diff --git a/.github/workflows/scripts/check_clang_tidy.sh b/.github/workflows/scripts/check_clang_tidy.sh
index d9db1c9a3..06cbb0c74 100755
--- a/.github/workflows/scripts/check_clang_tidy.sh
+++ b/.github/workflows/scripts/check_clang_tidy.sh
@@ -2,11 +2,11 @@
CI_SETUP_CMAKE_ARGS=$1
-cd taichi
+cd test_actions
python3 -m pip install -r requirements_dev.txt
rm -rf build && mkdir build && cd build
cmake $CI_SETUP_CMAKE_ARGS ..
cd ..
-python3 ./scripts/run_clang_tidy.py $PWD/taichi -clang-tidy-binary clang-tidy-10 -checks=-*,performance-inefficient-string-concatenation,readability-identifier-naming -header-filter=$PWD/taichi -p $PWD/build -j2
+python3 ./scripts/run_clang_tidy.py $PWD/test_actions -clang-tidy-binary clang-tidy-10 -checks=-*,performance-inefficient-string-concatenation,readability-identifier-naming -header-filter=$PWD/test_actions -p $PWD/build -j2
diff --git a/.github/workflows/scripts/unix_build.sh b/.github/workflows/scripts/unix_build.sh
index e29cfe704..6e975d9bc 100755
--- a/.github/workflows/scripts/unix_build.sh
+++ b/.github/workflows/scripts/unix_build.sh
@@ -11,7 +11,7 @@ check_in_docker() {
}
IN_DOCKER=$(check_in_docker)
-[[ "$IN_DOCKER" == "true" ]] && cd taichi
+[[ "$IN_DOCKER" == "true" ]] && cd test_actions
setup_sccache() {
export SCCACHE_DIR=$(pwd)/sccache_cache
diff --git a/.github/workflows/scripts/unix_test.sh b/.github/workflows/scripts/unix_test.sh
index 54d99071f..f2248c864 100755
--- a/.github/workflows/scripts/unix_test.sh
+++ b/.github/workflows/scripts/unix_test.sh
@@ -20,7 +20,7 @@ fi
python3 -m pip install dist/*.whl
if [ -z "$GPU_TEST" ]; then
python3 -m pip install -r requirements_test.txt
- python3 -m pip install torch
+ #python3 -m pip install torch
else
## Only GPU machine uses system python.
export PATH=$PATH:$HOME/.local/bin
diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
index 368e00588..10c310c48 100644
--- a/.github/workflows/testing.yml
+++ b/.github/workflows/testing.yml
@@ -136,330 +136,329 @@ jobs:
docker run -id --user dev --name check_clang_tidy ghcr.io/taichi-dev/taichidev-cpu-ubuntu18.04:v0.1.0 /bin/bash
tar -cf - ../${{ github.event.repository.name }} --mode u=+rwx,g=+rwx,o=+rwx --owner 1000 --group 1000 | docker cp - check_clang_tidy:/home/dev/
docker exec --user root check_clang_tidy apt install -y clang-tidy-10
- docker exec --user dev check_clang_tidy /home/dev/taichi/.github/workflows/scripts/check_clang_tidy.sh "$CI_SETUP_CMAKE_ARGS"
+ docker exec --user dev check_clang_tidy /home/dev/test_actions/.github/workflows/scripts/check_clang_tidy.sh "$CI_SETUP_CMAKE_ARGS"
env:
CR_PAT: ${{ secrets.GITHUB_TOKEN }}
CI_SETUP_CMAKE_ARGS: -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DTI_WITH_OPENGL:BOOL=OFF -DTI_WITH_CC:BOOL=ON -DTI_WITH_VULKAN:BOOL=OFF -DTI_BUILD_TESTS:BOOL=OFF
- build_and_test_cpu_linux:
- name: Build and Test linux (CPU)
- needs: [check_code_format, check_files]
- timeout-minutes: 60
- strategy:
- matrix:
- include:
- - os: ubuntu-latest
- python: py36
- with_cc: OFF
- wanted_archs: "cpu"
- - os: ubuntu-latest
- python: py39
- with_cc: ON
- wanted_archs: "cpu,cc"
- runs-on: ${{ matrix.os }}
- permissions:
- packages: read
- contents: read
- steps:
- - uses: actions/checkout@v2
- with:
- submodules: "recursive"
-
- - name: Get sccache cache
- uses: actions/cache@v2
- with:
- path: sccache_cache
- key: sccache-linux-${{matrix.with_cc}}-${{ github.sha }}
- restore-keys: |
- sccache-linux-${{matrix.with_cc}}-
-
- - name: Get docker images
- run: |
- if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
- exit 0
- fi
- # https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions#upgrading-a-workflow-that-accesses-ghcrio
- echo $CR_PAT | docker login ghcr.io -u ${{ github.actor }} --password-stdin
- docker pull ghcr.io/taichi-dev/taichidev-cpu-ubuntu18.04:v0.1.0
- env:
- CR_PAT: ${{ secrets.GITHUB_TOKEN }}
-
- - name: Build
- run: |
- if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
- exit 0
- fi
- mkdir -m777 shared
- docker create --user dev --name taichi_build \
- -e PY -e PROJECT_NAME -e TAICHI_CMAKE_ARGS \
- ghcr.io/taichi-dev/taichidev-cpu-ubuntu18.04:v0.1.0 \
- /home/dev/taichi/.github/workflows/scripts/unix_build.sh
- # A tarball is needed because sccache needs some permissions that only the file owner has.
- # 1000 is the uid and gid of user "dev" in the container.
- # If the uid or gid of the user inside the docker changes, please change the uid and gid in the following line.
- tar -cf - ../${{ github.event.repository.name }} --mode u=+rwx,g=+rwx,o=+rwx --owner 1000 --group 1000 | docker cp - taichi_build:/home/dev/
- docker start -a taichi_build
- rm -rf sccache_cache
- docker cp taichi_build:/home/dev/taichi/sccache_cache sccache_cache
- docker cp taichi_build:/home/dev/taichi/dist shared/dist
- docker cp taichi_build:/home/dev/taichi/build shared/build
- env:
- PY: ${{ matrix.python }}
- PROJECT_NAME: taichi
- TAICHI_CMAKE_ARGS: -DTI_WITH_OPENGL:BOOL=OFF -DTI_WITH_CC:BOOL=${{ matrix.with_cc }} -DTI_WITH_VULKAN:BOOL=OFF -DTI_BUILD_TESTS:BOOL=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
-
- - name: Test
- run: |
- if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
- exit 0
- fi
- docker create --user dev --name taichi_test -e PY -e TI_WANTED_ARCHS ghcr.io/taichi-dev/taichidev-cpu-ubuntu18.04:v0.1.0 /home/dev/unix_test.sh
- docker cp .github/workflows/scripts/unix_test.sh taichi_test:/home/dev/unix_test.sh
- docker cp shared/dist/ taichi_test:/home/dev/
- docker cp shared/build/ taichi_test:/home/dev/
- docker cp ./requirements_test.txt taichi_test:/home/dev/requirements_test.txt
- docker cp tests/ taichi_test:/home/dev/
- docker start -a taichi_test
- env:
- PY: ${{ matrix.python }}
- TI_WANTED_ARCHS: ${{ matrix.wanted_archs }}
-
- - name: clean docker container
- if: always()
- run: |
- docker rm taichi_build taichi_test -f
-
- build_and_test_cpu_mac:
- name: Build and Test macos (CPU)
- needs: [check_code_format, check_files]
- timeout-minutes: 60
- strategy:
- matrix:
- include:
- - os: macos-10.15
- python: 3.7
- with_cc: OFF
- with_cpp_tests: ON
- wanted_archs: "cpu"
- runs-on: ${{ matrix.os }}
- steps:
- - uses: actions/checkout@v2
- with:
- submodules: "recursive"
-
- - uses: actions/setup-python@v2
- with:
- python-version: ${{ matrix.python }}
-
- - name: Get sccache cache
- uses: actions/cache@v2
- with:
- path: sccache_cache
- key: sccache-mac-${{ github.sha }}
- restore-keys: |
- sccache-mac-
-
- - name: Download Pre-Built LLVM 10.0.0
- run: |
- if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
- exit 0
- fi
- python misc/ci_download.py
- env:
- CI_PLATFORM: ${{ matrix.os }}
-
- - name: Build & Install
- run: |
- brew install molten-vk
- if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
- exit 0
- fi
- mkdir -p sccache_cache
- export PATH=`pwd`/taichi-llvm/bin/:$PATH
- .github/workflows/scripts/unix_build.sh
- brew uninstall molten-vk
- env:
- TAICHI_CMAKE_ARGS: -DTI_WITH_OPENGL:BOOL=OFF -DTI_WITH_CC:BOOL=${{ matrix.with_cc }} -DTI_WITH_VULKAN:BOOL=ON -DTI_BUILD_TESTS:BOOL=${{ matrix.with_cpp_tests }} -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
- CXX: clang++
- # [DEBUG] Copy this step around to enable debugging inside Github Action instances.
- #- name: Setup tmate session
- # uses: mxschmitt/action-tmate@v3
- # with:
- # limit-access-to-actor: true
-
- - name: Test
- run: |
- if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
- exit 0
- fi
- .github/workflows/scripts/unix_test.sh
- env:
- TI_WANTED_ARCHS: ${{ matrix.wanted_archs }}
-
- build_and_test_gpu_linux:
- name: Build and Test (GPU)
- needs: [check_code_format, check_files]
- runs-on: [self-hosted, cuda, vulkan, cn]
- timeout-minutes: 60
- steps:
- - uses: actions/checkout@v2
- with:
- submodules: "recursive"
-
- - name: Get sccache cache
- uses: actions/cache@v2
- with:
- path: sccache_cache
- key: sccache-linux-gpu-${{ github.sha }}
- restore-keys: |
- sccache-linux-gpu-
-
- - name: Build & Install
- run: |
- if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
- exit 0
- fi
- mkdir -m777 shared
- docker create --user dev --name taichi_build --gpus all -v /tmp/.X11-unix:/tmp/.X11-unix \
- -e PY -e GPU_BUILD -e PROJECT_NAME -e TAICHI_CMAKE_ARGS -e DISPLAY \
- registry.taichigraphics.com/taichidev-ubuntu18.04:v0.1.1 \
- /home/dev/taichi/.github/workflows/scripts/unix_build.sh
- # A tarball is needed because sccache needs some permissions that only the file owner has.
- # 1000 is the uid and gid of user "dev" in the container.
- # If the uid or gid of the user inside the docker changes, please change the uid and gid in the following line.
- tar -cf - ../${{ github.event.repository.name }} --mode u=+rwx,g=+rwx,o=+rwx --owner 1000 --group 1000 | docker cp - taichi_build:/home/dev/
- docker start -a taichi_build
- rm -rf sccache_cache
- docker cp taichi_build:/home/dev/taichi/sccache_cache sccache_cache
- docker cp taichi_build:/home/dev/taichi/dist shared/dist
- docker cp taichi_build:/home/dev/taichi/build shared/build
- env:
- PY: py38
- GPU_BUILD: ON
- PROJECT_NAME: taichi
- TAICHI_CMAKE_ARGS: -DTI_WITH_OPENGL:BOOL=ON -DTI_WITH_CC:BOOL=OFF -DTI_WITH_VULKAN:BOOL=ON -DTI_BUILD_TESTS:BOOL=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
- DISPLAY: :1
-
- - name: Test
- run: |
- if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
- exit 0
- fi
- docker create --user dev --name taichi_test --gpus all -v /tmp/.X11-unix:/tmp/.X11-unix \
- -e DISPLAY -e PY -e GPU_TEST -e TI_WANTED_ARCHS \
- registry.taichigraphics.com/taichidev-ubuntu18.04:v0.1.1 \
- /home/dev/unix_test.sh
- docker cp .github/workflows/scripts/unix_test.sh taichi_test:/home/dev/unix_test.sh
- docker cp shared/dist/ taichi_test:/home/dev/
- docker cp shared/build/ taichi_test:/home/dev/
- docker cp tests/ taichi_test:/home/dev/
- docker cp requirements_test.txt taichi_test:/home/dev/requirements_test.txt
- docker start -a taichi_test
- env:
- PY: py38
- GPU_TEST: ON
- DISPLAY: :1
- TI_WANTED_ARCHS: "cpu,cuda,vulkan,opengl"
-
- - name: clean docker container
- if: always()
- run: |
- docker rm taichi_build taichi_test -f
-
- build_and_test_windows:
- name: Build and Test Windows
- needs: [check_code_format, check_files]
- runs-on: [self-hosted, windows, gpu]
- timeout-minutes: 90
- steps:
- - uses: actions/checkout@v2
- with:
- submodules: "recursive"
-
- - uses: actions/setup-python@v2
- with:
- python-version: 3.7
-
- - name: Add Visual Studio Shell to ENV
- uses: egor-tensin/vs-shell@v2
- with:
- arch: x64
-
- - name: Get sccache cache
- uses: actions/cache@v2
- with:
- path: ccache_cache
- key: ccache-win64-${{ github.sha }}
- restore-keys: |
- ccache-win64-
-
- - name: Build
- shell: powershell
- run: |
- if ( "${{needs.check_files.outputs.run_job}}" -eq "false" ) {
- exit 0
- }
- .\.github\workflows\scripts\win_build.ps1 -installVulkan -install -libsDir C:\
-
- - name: Test
- shell: powershell
- run: |
- if ( "${{needs.check_files.outputs.run_job}}" -eq "false" ) {
- exit 0
- }
- .\.github\workflows\scripts\win_test.ps1
- env:
- TI_WANTED_ARCHS: cpu,cuda,opengl
- TAICHI_CMAKE_ARGS: -DTI_WITH_OPENGL:BOOL=ON -DTI_WITH_CC:BOOL=OFF
- TI_SKIP_VERSION_CHECK: ON
-
- build_and_test_m1:
- name: Build and Test (Apple M1)
- needs: [check_code_format, check_files]
- timeout-minutes: 60
- strategy:
- matrix:
- include:
- - os: macos-latest
- python: 3.8
- defaults:
- run:
- # https://github.com/actions/runner/issues/805#issuecomment-844426478
- shell: "/usr/bin/arch -arch arm64e /bin/bash --noprofile --norc -eo pipefail {0}"
- runs-on: [self-hosted, m1]
- steps:
- - uses: actions/checkout@v2
- with:
- submodules: "recursive"
-
- - name: Get sccache cache
- uses: actions/cache@v2
- with:
- path: sccache_cache
- key: sccache-m1-${{ github.sha }}
- restore-keys: |
- sccache-m1-
-
- - name: Build
- run: |
- if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
- exit 0
- fi
- export PATH=/Users/github/miniforge3/envs/$PYTHON/bin:$PATH
- brew install molten-vk
- .github/workflows/scripts/unix_build.sh
- env:
- TAICHI_CMAKE_ARGS: -DTI_WITH_OPENGL:BOOL=OFF -DTI_WITH_CUDA:BOOL=OFF -DTI_WITH_CC:BOOL=OFF -DTI_WITH_VULKAN:BOOL=ON -DTI_BUILD_TESTS:BOOL=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
- PYTHON: ${{ matrix.python }}
- CXX: clang++
-
- - name: Test
- run: |
- if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
- exit 0
- fi
- export PATH=/Users/github/miniforge3/envs/$PYTHON/bin:$PATH
- .github/workflows/scripts/unix_test.sh
- env:
- TI_WANTED_ARCHS: "metal,vulkan,cpu"
- PYTHON: ${{ matrix.python }}
+ build_and_test_cpu_linux:
+ name: Build and Test linux (CPU)
+ needs: [check_code_format, check_files]
+ timeout-minutes: 60
+ strategy:
+ matrix:
+ include:
+ - os: ubuntu-latest
+ python: py36
+ with_cc: OFF
+ wanted_archs: "cpu"
+ - os: ubuntu-latest
+ python: py39
+ with_cc: ON
+ wanted_archs: "cpu,cc"
+ runs-on: ${{ matrix.os }}
+ permissions:
+ packages: read
+ contents: read
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ submodules: "recursive"
+
+ - name: Get sccache cache
+ uses: actions/cache@v2
+ with:
+ path: sccache_cache
+ key: sccache-linux-${{matrix.with_cc}}-${{ github.sha }}
+ restore-keys: |
+ sccache-linux-${{matrix.with_cc}}-
+
+ - name: Get docker images
+ run: |
+ if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
+ exit 0
+ fi
+ # https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions#upgrading-a-workflow-that-accesses-ghcrio
+ echo $CR_PAT | docker login ghcr.io -u ${{ github.actor }} --password-stdin
+ docker pull ghcr.io/taichi-dev/taichidev-cpu-ubuntu18.04:v0.1.0
+ env:
+ CR_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Build
+ run: |
+ if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
+ exit 0
+ fi
+ mkdir -m777 shared
+ docker create --user dev --name taichi_build \
+ -e PY -e PROJECT_NAME -e TAICHI_CMAKE_ARGS \
+ ghcr.io/taichi-dev/taichidev-cpu-ubuntu18.04:v0.1.0 \
+ /home/dev/test_actions/.github/workflows/scripts/unix_build.sh
+ # A tarball is needed because sccache needs some permissions that only the file owner has.
+ # 1000 is the uid and gid of user "dev" in the container.
+ # If the uid or gid of the user inside the docker changes, please change the uid and gid in the following line.
+ tar -cf - ../${{ github.event.repository.name }} --mode u=+rwx,g=+rwx,o=+rwx --owner 1000 --group 1000 | docker cp - taichi_build:/home/dev/
+ docker start -a taichi_build
+ rm -rf sccache_cache
+ docker cp taichi_build:/home/dev/test_actions/sccache_cache sccache_cache
+ docker cp taichi_build:/home/dev/test_actions/dist shared/dist
+ docker cp taichi_build:/home/dev/test_actions/build shared/build
+ env:
+ PY: ${{ matrix.python }}
+ PROJECT_NAME: taichi
+ TAICHI_CMAKE_ARGS: -DTI_WITH_OPENGL:BOOL=OFF -DTI_WITH_CC:BOOL=${{ matrix.with_cc }} -DTI_WITH_VULKAN:BOOL=OFF -DTI_BUILD_TESTS:BOOL=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
+
+ - name: Test
+ run: |
+ if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
+ exit 0
+ fi
+ docker create --user dev --name taichi_test -e PY -e TI_WANTED_ARCHS ghcr.io/taichi-dev/taichidev-cpu-ubuntu18.04:v0.1.0 /home/dev/unix_test.sh
+ docker cp .github/workflows/scripts/unix_test.sh taichi_test:/home/dev/unix_test.sh
+ docker cp shared/dist/ taichi_test:/home/dev/
+ docker cp shared/build/ taichi_test:/home/dev/
+ docker cp ./requirements_test.txt taichi_test:/home/dev/requirements_test.txt
+ docker cp tests/ taichi_test:/home/dev/
+ docker start -a taichi_test
+ env:
+ PY: ${{ matrix.python }}
+ TI_WANTED_ARCHS: ${{ matrix.wanted_archs }}
+
+ - name: clean docker container
+ if: always()
+ run: |
+ docker rm taichi_build taichi_test -f
+
+ build_and_test_cpu_mac:
+ name: Build and Test macos (CPU)
+ needs: [check_code_format, check_files]
+ timeout-minutes: 60
+ strategy:
+ matrix:
+ include:
+ - os: macos-10.15
+ python: "3.10.0"
+ with_cc: OFF
+ with_cpp_tests: ON
+ wanted_archs: "cpu"
+ runs-on: ${{ matrix.os }}
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ submodules: "recursive"
+
+ - uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python }}
+
+ - name: Get sccache cache
+ uses: actions/cache@v2
+ with:
+ path: sccache_cache
+ key: sccache-mac-${{ github.sha }}
+ restore-keys: |
+ sccache-mac-
+
+ - name: Download Pre-Built LLVM 10.0.0
+ run: |
+ if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
+ exit 0
+ fi
+ python misc/ci_download.py
+ env:
+ CI_PLATFORM: ${{ matrix.os }}
+
+ - name: Build & Install
+ run: |
+ brew install molten-vk
+ if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
+ exit 0
+ fi
+ mkdir -p sccache_cache
+ export PATH=`pwd`/taichi-llvm/bin/:$PATH
+ .github/workflows/scripts/unix_build.sh
+ brew uninstall molten-vk
+ env:
+ TAICHI_CMAKE_ARGS: -DTI_WITH_OPENGL:BOOL=OFF -DTI_WITH_CC:BOOL=${{ matrix.with_cc }} -DTI_WITH_VULKAN:BOOL=ON -DTI_BUILD_TESTS:BOOL=${{ matrix.with_cpp_tests }} -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
+ CXX: clang++
+ # [DEBUG] Copy this step around to enable debugging inside Github Action instances.
+ #- name: Setup tmate session
+ # uses: mxschmitt/action-tmate@v3
+ # with:
+ # limit-access-to-actor: true
+
+ - name: Test
+ run: |
+ if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
+ exit 0
+ fi
+ .github/workflows/scripts/unix_test.sh
+ env:
+ TI_WANTED_ARCHS: ${{ matrix.wanted_archs }}
+
+ build_and_test_gpu_linux:
+ name: Build and Test (GPU)
+ needs: [check_code_format, check_files]
+ runs-on: [self-hosted, cuda, vulkan, cn]
+ timeout-minutes: 60
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ submodules: "recursive"
+
+ - name: Get sccache cache
+ uses: actions/cache@v2
+ with:
+ path: sccache_cache
+ key: sccache-linux-gpu-${{ github.sha }}
+ restore-keys: |
+ sccache-linux-gpu-
+
+ - name: Build & Install
+ run: |
+ if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
+ exit 0
+ fi
+ mkdir -m777 shared
+ docker create --user dev --name taichi_build --gpus all -v /tmp/.X11-unix:/tmp/.X11-unix \
+ -e PY -e GPU_BUILD -e PROJECT_NAME -e TAICHI_CMAKE_ARGS -e DISPLAY \
+ registry.taichigraphics.com/taichidev-ubuntu18.04:v0.1.1 \
+ /home/dev/test_actions/.github/workflows/scripts/unix_build.sh
+ # A tarball is needed because sccache needs some permissions that only the file owner has.
+ # 1000 is the uid and gid of user "dev" in the container.
+ # If the uid or gid of the user inside the docker changes, please change the uid and gid in the following line.
+ tar -cf - ../${{ github.event.repository.name }} --mode u=+rwx,g=+rwx,o=+rwx --owner 1000 --group 1000 | docker cp - taichi_build:/home/dev/
+ docker start -a taichi_build
+ rm -rf sccache_cache
+ docker cp taichi_build:/home/dev/test_actions/sccache_cache sccache_cache
+ docker cp taichi_build:/home/dev/test_actions/dist shared/dist
+ docker cp taichi_build:/home/dev/test_actions/build shared/build
+ env:
+ PY: py38
+ GPU_BUILD: ON
+ PROJECT_NAME: taichi
+ TAICHI_CMAKE_ARGS: -DTI_WITH_OPENGL:BOOL=ON -DTI_WITH_CC:BOOL=OFF -DTI_WITH_VULKAN:BOOL=ON -DTI_BUILD_TESTS:BOOL=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
+ DISPLAY: :1
+
+ - name: Test
+ run: |
+ if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
+ exit 0
+ fi
+ docker create --user dev --name taichi_test --gpus all -v /tmp/.X11-unix:/tmp/.X11-unix \
+ -e DISPLAY -e PY -e GPU_TEST -e TI_WANTED_ARCHS \
+ registry.taichigraphics.com/taichidev-ubuntu18.04:v0.1.1 \
+ /home/dev/unix_test.sh
+ docker cp .github/workflows/scripts/unix_test.sh taichi_test:/home/dev/unix_test.sh
+ docker cp shared/dist/ taichi_test:/home/dev/
+ docker cp shared/build/ taichi_test:/home/dev/
+ docker cp tests/ taichi_test:/home/dev/
+ docker cp requirements_test.txt taichi_test:/home/dev/requirements_test.txt
+ docker start -a taichi_test
+ env:
+ PY: py38
+ GPU_TEST: ON
+ DISPLAY: :1
+ TI_WANTED_ARCHS: "cpu,cuda,vulkan,opengl"
+
+ - name: clean docker container
+ if: always()
+ run: |
+ docker rm taichi_build taichi_test -f
+
+ build_and_test_windows:
+ name: Build and Test Windows
+ needs: [check_code_format, check_files]
+ runs-on: [self-hosted, windows, gpu]
+ timeout-minutes: 90
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ submodules: "recursive"
+
+ - uses: actions/setup-python@v2
+ with:
+ python-version: 3.7
+
+ - name: Add Visual Studio Shell to ENV
+ uses: egor-tensin/vs-shell@v2
+ with:
+ arch: x64
+
+ - name: Get sccache cache
+ uses: actions/cache@v2
+ with:
+ path: ccache_cache
+ key: ccache-win64-${{ github.sha }}
+ restore-keys: |
+ ccache-win64-
+
+ - name: Build
+ shell: powershell
+ run: |
+ if ( "${{needs.check_files.outputs.run_job}}" -eq "false" ) {
+ exit 0
+ }
+ .\.github\workflows\scripts\win_build.ps1 -installVulkan -install -libsDir C:\
+
+ - name: Test
+ shell: powershell
+ run: |
+ if ( "${{needs.check_files.outputs.run_job}}" -eq "false" ) {
+ exit 0
+ }
+ .\.github\workflows\scripts\win_test.ps1
+ env:
+ TI_WANTED_ARCHS: cpu,cuda,opengl
+ TAICHI_CMAKE_ARGS: -DTI_WITH_OPENGL:BOOL=ON -DTI_WITH_CC:BOOL=OFF
+ TI_SKIP_VERSION_CHECK: ON
+
+ build_and_test_m1:
+ name: Build and Test (Apple M1)
+ needs: [check_code_format, check_files]
+ timeout-minutes: 60
+ strategy:
+ matrix:
+ include:
+ - os: macos-latest
+ python: 3.10
+ defaults:
+ run:
+ # https://github.com/actions/runner/issues/805#issuecomment-844426478
+ shell: "/usr/bin/arch -arch arm64e /bin/bash --noprofile --norc -eo pipefail {0}"
+ runs-on: [self-hosted, m1]
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ submodules: "recursive"
+
+ - name: Get sccache cache
+ uses: actions/cache@v2
+ with:
+ path: sccache_cache
+ key: sccache-m1-${{ github.sha }}
+ restore-keys: |
+ sccache-m1-
+
+ - name: Build
+ run: |
+ if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
+ exit 0
+ fi
+ rm -rf $HOME/Library/Python/3.8/lib/python/site-packages/taichi
+ brew install molten-vk
+ export PATH=/Users/github/miniforge3/envs/3.10/bin:$PATH
+ .github/workflows/scripts/unix_build.sh
+ env:
+ TAICHI_CMAKE_ARGS: -DTI_WITH_OPENGL:BOOL=OFF -DTI_WITH_CUDA:BOOL=OFF -DTI_WITH_CC:BOOL=OFF -DTI_WITH_VULKAN:BOOL=ON -DTI_BUILD_TESTS:BOOL=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
+ CXX: clang++
+
+ - name: Test
+ run: |
+ if [[ ${{needs.check_files.outputs.run_job}} == false ]]; then
+ exit 0
+ fi
+ export PATH=/Users/github/miniforge3/envs/3.10/bin:$PATH
+ .github/workflows/scripts/unix_test.sh
+ env:
+ TI_WANTED_ARCHS: "metal,vulkan,cpu"
diff --git a/mpm88_scoped_profiler.txt b/mpm88_scoped_profiler.txt
new file mode 100644
index 000000000..a4f93d600
--- /dev/null
+++ b/mpm88_scoped_profiler.txt
@@ -0,0 +1,228 @@
+[38;2;000;255;255m>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+[0m[38;2;255;000;000m[Profiler thread 139981479728064]
+[0m [38;2;255;000;000m260.178 ms compile [0m[38;2;000;255;255m [1 x 260.178 ms]
+[0m [38;2;144;238;144m212.325 ms 81.61% compile_to_executable[0m[38;2;000;255;255m [1 x 212.325 ms]
+[0m [38;2;255;255;000m163.618 ms 77.06% compile_to_offloads [0m[38;2;000;255;255m [1 x 163.618 ms]
+[0m [38;2;173;216;230m 0.012 ms 0.01% frontend_type_check [0m[38;2;000;255;255m [1 x 11.921 us]
+[0m [38;2;173;216;230m 17.289 ms 10.57% lower_ast [0m[38;2;000;255;255m [1 x 17.289 ms]
+[0m [38;2;255;000;255m 13.589 ms 78.60% replace_all_usages_with[0m[38;2;000;255;255m [1999 x 6.798 us]
+[0m [38;2;255;000;255m 3.700 ms 21.40% [unaccounted]
+[0m [38;2;173;216;230m 0.587 ms 0.36% type_check [0m[38;2;000;255;255m [2 x 293.374 us]
+[0m [38;2;173;216;230m 1.331 ms 0.81% verify [0m[38;2;000;255;255m [8 x 166.386 us]
+[0m [38;2;173;216;230m 0.309 ms 0.19% bit_loop_vectorize [0m[38;2;000;255;255m [1 x 308.990 us]
+[0m [38;2;255;000;255m306.845 us 99.31% die [0m[38;2;000;255;255m [1 x 306.845 us]
+[0m [38;2;255;000;255m 2.146 us 0.69% [unaccounted]
+[0m [38;2;173;216;230m139.275 ms 85.12% full_simplify [0m[38;2;000;255;255m [3 x 46.425 ms]
+[0m [38;2;255;000;255m 0.778 ms 0.56% extract_constant [0m[38;2;000;255;255m [7 x 111.171 us]
+[0m [38;2;255;000;255m 0.192 ms 0.14% unreachable_code_elimination[0m[38;2;000;255;255m [7 x 27.418 us]
+[0m [38;2;255;000;255m 0.308 ms 0.22% binary_op_simplify [0m[38;2;000;255;255m [7 x 44.005 us]
+[0m [38;2;255;000;255m 12.159 us 3.95% replace_all_usages_with[0m[38;2;000;255;255m [1 x 12.159 us]
+[0m [38;2;255;000;255m295.877 us 96.05% [unaccounted]
+[0m [38;2;255;000;255m 19.530 ms 14.02% constant_fold [0m[38;2;000;255;255m [7 x 2.790 ms]
+[0m [38;2;255;000;255m 0.816 ms 4.18% replace_all_usages_with[0m[38;2;000;255;255m [112 x 7.287 us]
+[0m [38;2;255;000;255m 18.307 ms 93.74% compile [0m[38;2;000;255;255m [2 x 9.153 ms]
+[0m [38;2;255;000;255m 0.073 ms 0.40% compile_to_executable[0m[38;2;000;255;255m [2 x 36.597 us]
+[0m [38;2;255;000;255m 34.094 us 46.58% compile_to_offloads [0m[38;2;000;255;255m [2 x 17.047 us]
+[0m [38;2;255;000;255m 0.954 us 2.80% frontend_type_check [0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m 3.338 us 9.79% lower_ast [0m[38;2;000;255;255m [2 x 1.669 us]
+[0m [38;2;255;000;255m 1.907 us 5.59% type_check [0m[38;2;000;255;255m [2 x 953.674 ns]
+[0m [38;2;255;000;255m 2.146 us 6.29% verify [0m[38;2;000;255;255m [4 x 536.442 ns]
+[0m [38;2;255;000;255m 0.954 us 2.80% demote_operations [0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m 21.935 us 64.34% offload [0m[38;2;000;255;255m [2 x 10.967 us]
+[0m [38;2;255;000;255m 1.192 us 5.43% type_check [0m[38;2;000;255;255m [4 x 298.023 ns]
+[0m [38;2;255;000;255m 20.742 us 94.57% [unaccounted]
+[0m [38;2;255;000;255m 2.861 us 8.39% [unaccounted]
+[0m [38;2;255;000;255m 38.147 us 52.12% offload_to_executable[0m[38;2;000;255;255m [2 x 19.073 us]
+[0m [38;2;255;000;255m 9.298 us 24.38% verify [0m[38;2;000;255;255m [20 x 464.916 ns]
+[0m [38;2;255;000;255m 1.907 us 5.00% demote_atomics [0m[38;2;000;255;255m [4 x 476.837 ns]
+[0m [38;2;255;000;255m 0.000 us 0.00% type_check [0m[38;2;000;255;255m [4 x 0.000 ns]
+[0m [38;2;255;000;255m 1.907 us 100.00% [unaccounted]
+[0m [38;2;255;000;255m 1.907 us 5.00% type_check [0m[38;2;000;255;255m [8 x 238.419 ns]
+[0m [38;2;255;000;255m 0.954 us 2.50% make_thread_local [0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m953.674 ns type_check [0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m 0.000 us 0.00% make_mesh_thread_local[0m[38;2;000;255;255m [2 x 0.000 ns]
+[0m [38;2;255;000;255m 0.000 ns type_check [0m[38;2;000;255;255m [2 x 0.000 ns]
+[0m [38;2;255;000;255m 2.146 us 5.62% demote_mesh_statements[0m[38;2;000;255;255m [2 x 1.073 us]
+[0m [38;2;255;000;255m 1.192 us 55.56% type_check [0m[38;2;000;255;255m [2 x 596.046 ns]
+[0m [38;2;255;000;255m 0.954 us 44.44% [unaccounted]
+[0m [38;2;255;000;255m 0.000 us 0.00% remove_range_assumption[0m[38;2;000;255;255m [2 x 0.000 ns]
+[0m [38;2;255;000;255m 1.907 us 5.00% remove_loop_unique [0m[38;2;000;255;255m [2 x 953.674 ns]
+[0m [38;2;255;000;255m 1.907 us 5.00% die [0m[38;2;000;255;255m [2 x 953.674 ns]
+[0m [38;2;255;000;255m 0.000 us 0.00% flag_access [0m[38;2;000;255;255m [2 x 0.000 ns]
+[0m [38;2;255;000;255m 0.000 us 0.00% demote_operations [0m[38;2;000;255;255m [2 x 0.000 ns]
+[0m [38;2;255;000;255m 2.861 us 7.50% full_simplify [0m[38;2;000;255;255m [2 x 1.431 us]
+[0m [38;2;255;000;255m 1.907 us 66.67% simplify [0m[38;2;000;255;255m [2 x 953.674 ns]
+[0m [38;2;255;000;255m 0.954 us 33.33% die [0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m 3.099 us 8.12% optimize_bit_struct_stores[0m[38;2;000;255;255m [2 x 1.550 us]
+[0m [38;2;255;000;255m 1.907 us 61.54% die [0m[38;2;000;255;255m [2 x 953.674 ns]
+[0m [38;2;255;000;255m 1.192 us 38.46% [unaccounted]
+[0m [38;2;255;000;255m 12.159 us 31.88% [unaccounted]
+[0m [38;2;255;000;255m 0.954 us 1.30% [unaccounted]
+[0m [38;2;255;000;255m 18.228 ms 99.57% codegen [0m[38;2;000;255;255m [2 x 9.114 ms]
+[0m [38;2;255;000;255m 9.115 ms 50.01% clone_struct_module [0m[38;2;000;255;255m [2 x 4.557 ms]
+[0m [38;2;255;000;255m 0.001 ms 0.01% CodeGenLLVMCPU [0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m 0.045 ms 0.25% emit_to_module [0m[38;2;000;255;255m [2 x 22.411 us]
+[0m [38;2;255;000;255m 9.059 ms 49.70% compile_module_to_executable[0m[38;2;000;255;255m [2 x 4.530 ms]
+[0m [38;2;255;000;255m 1.679 ms 18.53% eliminate_unused_functions[0m[38;2;000;255;255m [2 x 839.472 us]
+[0m [38;2;255;000;255m 5.826 ms 64.31% global_optimize_module_cpu[0m[38;2;000;255;255m [2 x 2.913 ms]
+[0m [38;2;255;000;255m 0.780 ms 13.39% llvm_function_pass [0m[38;2;000;255;255m [2 x 389.934 us]
+[0m [38;2;255;000;255m 3.091 ms 53.06% llvm_module_pass [0m[38;2;000;255;255m [2 x 1.546 ms]
+[0m [38;2;255;000;255m 1.955 ms 33.55% [unaccounted]
+[0m [38;2;255;000;255m 1.554 ms 17.16% [unaccounted]
+[0m [38;2;255;000;255m 0.407 ms 2.08% [unaccounted]
+[0m [38;2;255;000;255m 2.801 ms 2.01% die [0m[38;2;000;255;255m [21 x 133.401 us]
+[0m [38;2;255;000;255m 0.804 ms 0.58% alg_simp [0m[38;2;000;255;255m [7 x 114.884 us]
+[0m [38;2;255;000;255m151.634 us 18.86% replace_all_usages_with[0m[38;2;000;255;255m [72 x 2.106 us]
+[0m [38;2;255;000;255m652.552 us 81.14% [unaccounted]
+[0m [38;2;255;000;255m 0.047 ms 0.03% loop_invariant_code_motion[0m[38;2;000;255;255m [7 x 6.744 us]
+[0m [38;2;255;000;255m 0.639 ms 0.46% simplify [0m[38;2;000;255;255m [7 x 91.348 us]
+[0m [38;2;255;000;255m 23.106 ms 16.59% whole_kernel_cse [0m[38;2;000;255;255m [7 x 3.301 ms]
+[0m [38;2;255;000;255m 6.803 ms 29.44% replace_all_usages_with[0m[38;2;000;255;255m [620 x 10.973 us]
+[0m [38;2;255;000;255m 16.303 ms 70.56% [unaccounted]
+[0m [38;2;255;000;255m 91.065 ms 65.38% cfg_optimization [0m[38;2;000;255;255m [5 x 18.213 ms]
+[0m [38;2;255;000;255m 63.132 ms 69.33% store_to_load_forwarding[0m[38;2;000;255;255m [6 x 10.522 ms]
+[0m [38;2;255;000;255m 30.162 ms 47.78% reaching_definition_analysis[0m[38;2;000;255;255m [6 x 5.027 ms]
+[0m [38;2;255;000;255m 11.839 ms 18.75% replace_all_usages_with[0m[38;2;000;255;255m [1283 x 9.227 us]
+[0m [38;2;255;000;255m 21.132 ms 33.47% [unaccounted]
+[0m [38;2;255;000;255m 26.704 ms 29.32% dead_store_elimination[0m[38;2;000;255;255m [6 x 4.451 ms]
+[0m [38;2;255;000;255m 18.921 ms 70.85% live_variable_analysis[0m[38;2;000;255;255m [6 x 3.153 ms]
+[0m [38;2;255;000;255m 0.103 ms 0.38% replace_all_usages_with[0m[38;2;000;255;255m [19 x 5.396 us]
+[0m [38;2;255;000;255m 7.681 ms 28.76% [unaccounted]
+[0m [38;2;255;000;255m 0.721 ms 0.79% die [0m[38;2;000;255;255m [5 x 144.196 us]
+[0m [38;2;255;000;255m 0.508 ms 0.56% [unaccounted]
+[0m [38;2;173;216;230m 0.011 ms 0.01% inlining [0m[38;2;000;255;255m [1 x 10.967 us]
+[0m [38;2;173;216;230m 0.020 ms 0.01% flag_access [0m[38;2;000;255;255m [2 x 9.894 us]
+[0m [38;2;173;216;230m 1.025 ms 0.63% offload [0m[38;2;000;255;255m [1 x 1.025 ms]
+[0m [38;2;255;000;255m 0.007 ms 0.70% replace_all_usages_with[0m[38;2;000;255;255m [4 x 1.788 us]
+[0m [38;2;255;000;255m 0.106 ms 10.33% type_check [0m[38;2;000;255;255m [2 x 52.929 us]
+[0m [38;2;255;000;255m 0.912 ms 88.97% [unaccounted]
+[0m [38;2;173;216;230m 3.751 ms 2.29% cfg_optimization [0m[38;2;000;255;255m [1 x 3.751 ms]
+[0m [38;2;255;000;255m 2.075 ms 55.32% store_to_load_forwarding[0m[38;2;000;255;255m [1 x 2.075 ms]
+[0m [38;2;255;000;255m 1.174 ms 56.57% reaching_definition_analysis[0m[38;2;000;255;255m [1 x 1.174 ms]
+[0m [38;2;255;000;255m 0.901 ms 43.43% [unaccounted]
+[0m [38;2;255;000;255m 1.476 ms 39.34% dead_store_elimination[0m[38;2;000;255;255m [1 x 1.476 ms]
+[0m [38;2;255;000;255m 0.945 ms 64.02% live_variable_analysis[0m[38;2;000;255;255m [1 x 944.853 us]
+[0m [38;2;255;000;255m 0.531 ms 35.98% [unaccounted]
+[0m [38;2;255;000;255m 0.147 ms 3.92% die [0m[38;2;000;255;255m [1 x 147.104 us]
+[0m [38;2;255;000;255m 0.053 ms 1.41% [unaccounted]
+[0m [38;2;255;255;000m 48.706 ms 22.94% offload_to_executable[0m[38;2;000;255;255m [1 x 48.706 ms]
+[0m [38;2;173;216;230m 1.132 ms 2.33% verify [0m[38;2;000;255;255m [10 x 113.249 us]
+[0m [38;2;173;216;230m 0.334 ms 0.69% demote_atomics [0m[38;2;000;255;255m [2 x 166.893 us]
+[0m [38;2;255;000;255m103.951 us 31.14% replace_all_usages_with[0m[38;2;000;255;255m [57 x 1.824 us]
+[0m [38;2;255;000;255m 66.042 us 19.79% type_check [0m[38;2;000;255;255m [2 x 33.021 us]
+[0m [38;2;255;000;255m163.794 us 49.07% [unaccounted]
+[0m [38;2;173;216;230m 0.007 ms 0.01% replace_all_usages_with[0m[38;2;000;255;255m [6 x 1.113 us]
+[0m [38;2;173;216;230m 0.141 ms 0.29% type_check [0m[38;2;000;255;255m [4 x 35.167 us]
+[0m [38;2;173;216;230m 1.043 ms 2.14% make_thread_local [0m[38;2;000;255;255m [1 x 1.043 ms]
+[0m [38;2;255;000;255m 0.033 ms 3.16% type_check [0m[38;2;000;255;255m [1 x 32.902 us]
+[0m [38;2;255;000;255m 1.010 ms 96.84% [unaccounted]
+[0m [38;2;173;216;230m 0.033 ms 0.07% make_mesh_thread_local[0m[38;2;000;255;255m [1 x 33.140 us]
+[0m [38;2;255;000;255m 33.140 us 100.00% type_check [0m[38;2;000;255;255m [1 x 33.140 us]
+[0m [38;2;173;216;230m 0.037 ms 0.08% demote_mesh_statements[0m[38;2;000;255;255m [1 x 36.955 us]
+[0m [38;2;255;000;255m 31.948 us 86.45% type_check [0m[38;2;000;255;255m [1 x 31.948 us]
+[0m [38;2;255;000;255m 5.007 us 13.55% [unaccounted]
+[0m [38;2;173;216;230m 0.006 ms 0.01% remove_range_assumption[0m[38;2;000;255;255m [1 x 5.960 us]
+[0m [38;2;173;216;230m 0.004 ms 0.01% remove_loop_unique [0m[38;2;000;255;255m [1 x 4.053 us]
+[0m [38;2;173;216;230m 0.351 ms 0.72% die [0m[38;2;000;255;255m [1 x 350.952 us]
+[0m [38;2;173;216;230m 0.016 ms 0.03% flag_access [0m[38;2;000;255;255m [1 x 15.974 us]
+[0m [38;2;173;216;230m 0.877 ms 1.80% demote_operations [0m[38;2;000;255;255m [1 x 877.142 us]
+[0m [38;2;255;000;255m432.014 us 49.25% replace_all_usages_with[0m[38;2;000;255;255m [157 x 2.752 us]
+[0m [38;2;255;000;255m111.103 us 12.67% type_check [0m[38;2;000;255;255m [2 x 55.552 us]
+[0m [38;2;255;000;255m334.024 us 38.08% [unaccounted]
+[0m [38;2;173;216;230m 44.051 ms 90.44% full_simplify [0m[38;2;000;255;255m [1 x 44.051 ms]
+[0m [38;2;255;000;255m 0.057 ms 0.13% extract_constant [0m[38;2;000;255;255m [3 x 18.994 us]
+[0m [38;2;255;000;255m 0.086 ms 0.19% unreachable_code_elimination[0m[38;2;000;255;255m [3 x 28.610 us]
+[0m [38;2;255;000;255m 0.189 ms 0.43% binary_op_simplify [0m[38;2;000;255;255m [3 x 63.022 us]
+[0m [38;2;255;000;255m 6.914 us 3.66% replace_all_usages_with[0m[38;2;000;255;255m [6 x 1.152 us]
+[0m [38;2;255;000;255m182.152 us 96.34% [unaccounted]
+[0m [38;2;255;000;255m 8.466 ms 19.22% constant_fold [0m[38;2;000;255;255m [3 x 2.822 ms]
+[0m [38;2;255;000;255m 8.332 ms 98.42% compile [0m[38;2;000;255;255m [1 x 8.332 ms]
+[0m [38;2;255;000;255m 0.032 ms 0.38% compile_to_executable[0m[38;2;000;255;255m [1 x 31.948 us]
+[0m [38;2;255;000;255m 14.067 us 44.03% compile_to_offloads [0m[38;2;000;255;255m [1 x 14.067 us]
+[0m [38;2;255;000;255m 0.000 us 0.00% frontend_type_check [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 1.192 us 8.47% lower_ast [0m[38;2;000;255;255m [1 x 1.192 us]
+[0m [38;2;255;000;255m 0.954 us 6.78% type_check [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m 1.907 us 13.56% verify [0m[38;2;000;255;255m [2 x 953.674 ns]
+[0m [38;2;255;000;255m 0.000 us 0.00% demote_operations [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 8.106 us 57.63% offload [0m[38;2;000;255;255m [1 x 8.106 us]
+[0m [38;2;255;000;255m 0.954 us 11.76% type_check [0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m 7.153 us 88.24% [unaccounted]
+[0m [38;2;255;000;255m 1.907 us 13.56% [unaccounted]
+[0m [38;2;255;000;255m 17.881 us 55.97% offload_to_executable[0m[38;2;000;255;255m [1 x 17.881 us]
+[0m [38;2;255;000;255m 0.000 us 0.00% verify [0m[38;2;000;255;255m [10 x 0.000 ns]
+[0m [38;2;255;000;255m 2.146 us 12.00% demote_atomics [0m[38;2;000;255;255m [2 x 1.073 us]
+[0m [38;2;255;000;255m 0.000 us 0.00% type_check [0m[38;2;000;255;255m [2 x 0.000 ns]
+[0m [38;2;255;000;255m 2.146 us 100.00% [unaccounted]
+[0m [38;2;255;000;255m 2.861 us 16.00% type_check [0m[38;2;000;255;255m [4 x 715.256 ns]
+[0m [38;2;255;000;255m 0.954 us 5.33% make_thread_local [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m953.674 ns type_check [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m 0.000 us 0.00% make_mesh_thread_local[0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 0.000 ns type_check [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 0.954 us 5.33% demote_mesh_statements[0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m 0.000 ns type_check [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 0.000 us 0.00% remove_range_assumption[0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 0.954 us 5.33% remove_loop_unique [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m 0.954 us 5.33% die [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m 0.954 us 5.33% flag_access [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m 1.192 us 6.67% demote_operations [0m[38;2;000;255;255m [1 x 1.192 us]
+[0m [38;2;255;000;255m 0.954 us 5.33% full_simplify [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m 0.000 ns simplify [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m953.674 ns die [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m 1.907 us 10.67% optimize_bit_struct_stores[0m[38;2;000;255;255m [1 x 1.907 us]
+[0m [38;2;255;000;255m 0.000 us 0.00% die [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 1.907 us 100.00% [unaccounted]
+[0m [38;2;255;000;255m 4.053 us 22.67% [unaccounted]
+[0m [38;2;255;000;255m 8.297 ms 99.58% codegen [0m[38;2;000;255;255m [1 x 8.297 ms]
+[0m [38;2;255;000;255m 3.498 ms 42.16% clone_struct_module [0m[38;2;000;255;255m [1 x 3.498 ms]
+[0m [38;2;255;000;255m 0.000 ms 0.00% CodeGenLLVMCPU [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 0.026 ms 0.31% emit_to_module [0m[38;2;000;255;255m [1 x 25.988 us]
+[0m [38;2;255;000;255m 4.769 ms 57.48% compile_module_to_executable[0m[38;2;000;255;255m [1 x 4.769 ms]
+[0m [38;2;255;000;255m 0.907 ms 19.02% eliminate_unused_functions[0m[38;2;000;255;255m [1 x 906.944 us]
+[0m [38;2;255;000;255m 3.066 ms 64.29% global_optimize_module_cpu[0m[38;2;000;255;255m [1 x 3.066 ms]
+[0m [38;2;255;000;255m 0.403 ms 13.15% llvm_function_pass [0m[38;2;000;255;255m [1 x 403.166 us]
+[0m [38;2;255;000;255m 1.645 ms 53.65% llvm_module_pass [0m[38;2;000;255;255m [1 x 1.645 ms]
+[0m [38;2;255;000;255m 1.018 ms 33.20% [unaccounted]
+[0m [38;2;255;000;255m 0.796 ms 16.70% [unaccounted]
+[0m [38;2;255;000;255m 0.019 ms 0.23% replace_all_usages_with[0m[38;2;000;255;255m [6 x 3.179 us]
+[0m [38;2;255;000;255m 0.115 ms 1.36% [unaccounted]
+[0m [38;2;255;000;255m 1.502 ms 3.41% die [0m[38;2;000;255;255m [9 x 166.919 us]
+[0m [38;2;255;000;255m 1.093 ms 2.48% alg_simp [0m[38;2;000;255;255m [3 x 364.383 us]
+[0m [38;2;255;000;255m 0.603 ms 55.16% replace_all_usages_with[0m[38;2;000;255;255m [201 x 3.000 us]
+[0m [38;2;255;000;255m 0.490 ms 44.84% [unaccounted]
+[0m [38;2;255;000;255m 0.021 ms 0.05% loop_invariant_code_motion[0m[38;2;000;255;255m [3 x 6.994 us]
+[0m [38;2;255;000;255m 4.764 ms 10.81% simplify [0m[38;2;000;255;255m [3 x 1.588 ms]
+[0m [38;2;255;000;255m 1.228 ms 25.77% replace_all_usages_with[0m[38;2;000;255;255m [414 x 2.965 us]
+[0m [38;2;255;000;255m 2.598 ms 54.53% type_check [0m[38;2;000;255;255m [174 x 14.929 us]
+[0m [38;2;255;000;255m 0.939 ms 19.70% [unaccounted]
+[0m [38;2;255;000;255m 25.229 ms 57.27% whole_kernel_cse [0m[38;2;000;255;255m [3 x 8.410 ms]
+[0m [38;2;255;000;255m 4.337 ms 17.19% replace_all_usages_with[0m[38;2;000;255;255m [1165 x 3.722 us]
+[0m [38;2;255;000;255m 20.892 ms 82.81% [unaccounted]
+[0m [38;2;255;000;255m 2.643 ms 6.00% cfg_optimization [Taichi] version 0.9.0, llvm 10.0.0, commit dd7654ae, linux, python 3.8.12
+[Taichi] Starting on arch=x64
+[0m[38;2;000;255;255m [2 x 1.322 ms]
+[0m [38;2;255;000;255m 0.964 ms 36.46% store_to_load_forwarding[0m[38;2;000;255;255m [3 x 321.229 us]
+[0m [38;2;255;000;255m262.737 us 27.26% reaching_definition_analysis[0m[38;2;000;255;255m [3 x 87.579 us]
+[0m [38;2;255;000;255m163.317 us 16.95% replace_all_usages_with[0m[38;2;000;255;255m [60 x 2.722 us]
+[0m [38;2;255;000;255m537.634 us 55.79% [unaccounted]
+[0m [38;2;255;000;255m 1.354 ms 51.24% dead_store_elimination[0m[38;2;000;255;255m [3 x 451.406 us]
+[0m [38;2;255;000;255m 0.658 ms 48.59% live_variable_analysis[0m[38;2;000;255;255m [3 x 219.345 us]
+[0m [38;2;255;000;255m 0.696 ms 51.41% [unaccounted]
+[0m [38;2;255;000;255m 0.294 ms 11.12% die [0m[38;2;000;255;255m [2 x 146.985 us]
+[0m [38;2;255;000;255m 0.031 ms 1.18% [unaccounted]
+[0m [38;2;173;216;230m 0.131 ms 0.27% optimize_bit_struct_stores[0m[38;2;000;255;255m [1 x 130.892 us]
+[0m [38;2;255;000;255m 77.009 us 58.83% die [0m[38;2;000;255;255m [1 x 77.009 us]
+[0m [38;2;255;000;255m 53.883 us 41.17% [unaccounted]
+[0m [38;2;173;216;230m 0.544 ms 1.12% [unaccounted]
+[0m [38;2;144;238;144m 47.835 ms 18.39% codegen [0m[38;2;000;255;255m [1 x 47.835 ms]
+[0m [38;2;255;255;000m 4.241 ms 8.87% clone_struct_module [0m[38;2;000;255;255m [1 x 4.241 ms]
+[0m [38;2;255;255;000m 0.000 ms 0.00% CodeGenLLVMCPU [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;255;000m 0.408 ms 0.85% emit_to_module [0m[38;2;000;255;255m [1 x 408.173 us]
+[0m [38;2;255;255;000m 43.178 ms 90.26% compile_module_to_executable[0m[38;2;000;255;255m [1 x 43.178 ms]
+[0m [38;2;173;216;230m 0.809 ms 1.87% eliminate_unused_functions[0m[38;2;000;255;255m [1 x 808.954 us]
+[0m [38;2;173;216;230m 35.383 ms 81.95% global_optimize_module_cpu[0m[38;2;000;255;255m [1 x 35.383 ms]
+[0m [38;2;255;000;255m 1.499 ms 4.24% llvm_function_pass [0m[38;2;000;255;255m [1 x 1.499 ms]
+[0m [38;2;255;000;255m 32.613 ms 92.17% llvm_module_pass [0m[38;2;000;255;255m [1 x 32.613 ms]
+[0m [38;2;255;000;255m 1.271 ms 3.59% [unaccounted]
+[0m [38;2;173;216;230m 6.986 ms 16.18% [unaccounted]
+[0m[38;2;000;255;255m>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+[0m
\ No newline at end of file
diff --git a/perf.data b/perf.data
new file mode 100644
index 000000000..616f13b37
Binary files /dev/null and b/perf.data differ
diff --git a/scoped_profile_full.txt b/scoped_profile_full.txt
new file mode 100644
index 000000000..10fbea513
--- /dev/null
+++ b/scoped_profile_full.txt
@@ -0,0 +1,244 @@
+[38;2;000;255;255m>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+[0m[38;2;255;000;000m[Profiler thread 140647799088064]
+[0m [38;2;255;000;000m 17.142 ms clone_runtime_module [0m[38;2;000;255;255m [2 x 8.571 ms]
+[0m [38;2;144;238;144m 9.855 ms 57.49% module_from_bitcode_file[0m[38;2;000;255;255m [1 x 9.855 ms]
+[0m [38;2;144;238;144m 7.262 ms 42.36% clone module [0m[38;2;000;255;255m [2 x 3.631 ms]
+[0m [38;2;255;000;000m651.836 us eliminate_unused_functions [0m[38;2;000;255;255m [1 x 651.836 us]
+[0m [38;2;255;000;000m 81.514 ms global_optimize_module_cpu [0m[38;2;000;255;255m [1 x 81.514 ms]
+[0m [38;2;144;238;144m 1.975 ms 2.42% llvm_function_pass [0m[38;2;000;255;255m [1 x 1.975 ms]
+[0m [38;2;144;238;144m 78.158 ms 95.88% llvm_module_pass [0m[38;2;000;255;255m [1 x 78.158 ms]
+[0m [38;2;144;238;144m 1.381 ms 1.69% [unaccounted]
+[0m [38;2;255;000;000m 5.082 ms run [0m[38;2;000;255;255m [1 x 5.082 ms]
+[0m [38;2;144;238;144m 0.036 ms 0.71% generate_types [0m[38;2;000;255;255m [19 x 1.895 us]
+[0m [38;2;144;238;144m 0.067 ms 1.32% generate_child_accessors[0m[38;2;000;255;255m [1 x 66.996 us]
+[0m [38;2;255;255;000m 10.014 us 14.95% generate_refine_coordinates[0m[38;2;000;255;255m [1 x 10.014 us]
+[0m [38;2;255;255;000m 56.982 us 85.05% generate_child_accessors[0m[38;2;000;255;255m [6 x 9.497 us]
+[0m [38;2;173;216;230m 31.948 us 56.07% generate_refine_coordinates[0m[38;2;000;255;255m [6 x 5.325 us]
+[0m [38;2;173;216;230m 15.736 us 27.62% generate_child_accessors[0m[38;2;000;255;255m [12 x 1.311 us]
+[0m [38;2;173;216;230m 9.298 us 16.32% [unaccounted]
+[0m [38;2;144;238;144m 4.979 ms 97.97% [unaccounted]
+[0m [38;2;255;000;000m290.610 ms compile [0m[38;2;000;255;255m [2 x 145.305 ms]
+[0m [38;2;144;238;144m218.299 ms 75.12% compile_to_executable[0m[38;2;000;255;255m [2 x 109.149 ms]
+[0m [38;2;255;255;000m169.878 ms 77.82% compile_to_offloads [0m[38;2;000;255;255m [2 x 84.939 ms]
+[0m [38;2;173;216;230m 0.014 ms 0.01% frontend_type_check [0m[38;2;000;255;255m [2 x 6.914 us]
+[0m [38;2;173;216;230m 18.433 ms 10.85% lower_ast [0m[38;2;000;255;255m [2 x 9.217 ms]
+[0m [38;2;255;000;255m 14.735 ms 79.94% replace_all_usages_with[0m[38;2;000;255;255m [2017 x 7.306 us]
+[0m [38;2;255;000;255m 3.698 ms 20.06% [unaccounted]
+[0m [38;2;173;216;230m 0.614 ms 0.36% type_check [0m[38;2;000;255;255m [4 x 153.482 us]
+[0m [38;2;173;216;230m 1.307 ms 0.77% verify [0m[38;2;000;255;255m [16 x 81.718 us]
+[0m [38;2;173;216;230m 0.327 ms 0.19% bit_loop_vectorize [0m[38;2;000;255;255m [2 x 163.555 us]
+[0m [38;2;255;000;255m324.965 us 99.34% die [0m[38;2;000;255;255m [2 x 162.482 us]
+[0m [38;2;255;000;255m 2.146 us 0.66% [unaccounted]
+[0m [38;2;173;216;230m144.166 ms 84.86% full_simplify [0m[38;2;000;255;255m [6 x 24.028 ms]
+[0m [38;2;255;000;255m 0.782 ms 0.54% extract_constant [0m[38;2;000;255;255m [14 x 55.875 us]
+[0m [38;2;255;000;255m 0.200 ms 0.14% unreachable_code_elimination[0m[38;2;000;255;255m [14 x 14.305 us]
+[0m [38;2;255;000;255m 0.314 ms 0.22% binary_op_simplify [0m[38;2;000;255;255m [14 x 22.462 us]
+[0m [38;2;255;000;255m 14.067 us 4.47% replace_all_usages_with[0m[38;2;000;255;255m [1 x 14.067 us]
+[0m [38;2;255;000;255m300.407 us 95.53% [unaccounted]
+[0m [38;2;255;000;255m 21.322 ms 14.79% constant_fold [0m[38;2;000;255;255m [14 x 1.523 ms]
+[0m [38;2;255;000;255m 0.889 ms 4.17% replace_all_usages_with[0m[38;2;000;255;255m [117 x 7.595 us]
+[0m [38;2;255;000;255m 20.008 ms 93.84% compile [0m[38;2;000;255;255m [2 x 10.004 ms]
+[0m [38;2;255;000;255m 0.075 ms 0.37% compile_to_executable[0m[38;2;000;255;255m [2 x 37.432 us]
+[0m [38;2;255;000;255m 36.955 us 49.36% compile_to_offloads [0m[38;2;000;255;255m [2 x 18.477 us]
+[0m [38;2;255;000;255m 0.000 us 0.00% frontend_type_check [0m[38;2;000;255;255m [2 x 0.000 ns]
+[0m [38;2;255;000;255m 2.861 us 7.74% lower_ast [0m[38;2;000;255;255m [2 x 1.431 us]
+[0m [38;2;255;000;255m 0.954 us 2.58% type_check [0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m 4.053 us 10.97% verify [0m[38;2;000;255;255m [4 x 1.013 us]
+[0m [38;2;255;000;255m 1.907 us 5.16% demote_operations [0m[38;2;000;255;255m [2 x 953.674 ns]
+[0m [38;2;255;000;255m 20.981 us 56.77% offload [0m[38;2;000;255;255m [2 x 10.490 us]
+[0m [38;2;255;000;255m 1.907 us 9.09% type_check [0m[38;2;000;255;255m [4 x 476.837 ns]
+[0m [38;2;255;000;255m 19.073 us 90.91% [unaccounted]
+[0m [38;2;255;000;255m 6.199 us 16.77% [unaccounted]
+[0m [38;2;255;000;255m 36.955 us 49.36% offload_to_executable[0m[38;2;000;255;255m [2 x 18.477 us]
+[0m [38;2;255;000;255m 3.815 us 10.32% verify [0m[38;2;000;255;255m [20 x 190.735 ns]
+[0m [38;2;255;000;255m 3.338 us 9.03% demote_atomics [0m[38;2;000;255;255m [4 x 834.465 ns]
+[0m [38;2;255;000;255m 2.146 us 64.29% type_check [0m[38;2;000;255;255m [4 x 536.442 ns]
+[0m [38;2;255;000;255m 1.192 us 35.71% [unaccounted]
+[0m [38;2;255;000;255m 1.907 us 5.16% type_check [0m[38;2;000;255;255m [8 x 238.419 ns]
+[0m [38;2;255;000;255m 0.954 us 2.58% make_thread_local [0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m953.674 ns type_check [0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m 0.000 us 0.00% make_mesh_thread_local[0m[38;2;000;255;255m [2 x 0.000 ns]
+[0m [38;2;255;000;255m 0.000 ns type_check [0m[38;2;000;255;255m [2 x 0.000 ns]
+[0m [38;2;255;000;255m 0.954 us 2.58% demote_mesh_statements[0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m953.674 ns type_check [0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m 0.954 us 2.58% remove_range_assumption[0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m 1.192 us 3.23% remove_loop_unique [0m[38;2;000;255;255m [2 x 596.046 ns]
+[0m [38;2;255;000;255m 1.907 us 5.16% die [0m[38;2;000;255;255m [2 x 953.674 ns]
+[0m [38;2;255;000;255m 2.146 us 5.81% flag_access [0m[38;2;000;255;255m [2 x 1.073 us]
+[0m [38;2;255;000;255m 0.954 us 2.58% demote_operations [0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m 3.099 us 8.39% full_simplify [0m[38;2;000;255;255m [2 x 1.550 us]
+[0m [38;2;255;000;255m 1.192 us 38.46% simplify [0m[38;2;000;255;255m [2 x 596.046 ns]
+[0m [38;2;255;000;255m 0.954 us 30.77% die [0m[38;2;000;255;255m [2 x 476.837 ns]
+[0m [38;2;255;000;255m 0.954 us 30.77% [unaccounted]
+[0m [38;2;255;000;255m 2.861 us 7.74% optimize_bit_struct_stores[0m[38;2;000;255;255m [2 x 1.431 us]
+[0m [38;2;255;000;255m 0.000 us 0.00% die [0m[38;2;000;255;255m [2 x 0.000 ns]
+[0m [38;2;255;000;255m 2.861 us 100.00% [unaccounted]
+[0m [38;2;255;000;255m 12.875 us 34.84% [unaccounted]
+[0m [38;2;255;000;255m 0.954 us 1.27% [unaccounted]
+[0m [38;2;255;000;255m 19.926 ms 99.59% codegen [0m[38;2;000;255;255m [2 x 9.963 ms]
+[0m [38;2;255;000;255m 10.840 ms 54.40% clone_struct_module [0m[38;2;000;255;255m [2 x 5.420 ms]
+[0m [38;2;255;000;255m 0.000 ms 0.00% CodeGenLLVMCPU [0m[38;2;000;255;255m [2 x 0.000 ns]
+[0m [38;2;255;000;255m 0.046 ms 0.23% emit_to_module [0m[38;2;000;255;255m [2 x 23.007 us]
+[0m [38;2;255;000;255m 9.030 ms 45.32% compile_module_to_executable[0m[38;2;000;255;255m [2 x 4.515 ms]
+[0m [38;2;255;000;255m 1.717 ms 19.02% eliminate_unused_functions[0m[38;2;000;255;255m [2 x 858.545 us]
+[0m [38;2;255;000;255m 5.776 ms 63.96% global_optimize_module_cpu[0m[38;2;000;255;255m [2 x 2.888 ms]
+[0m [38;2;255;000;255m 0.777 ms 13.46% llvm_function_pass [0m[38;2;000;255;255m [2 x 388.622 us]
+[0m [38;2;255;000;255m 3.068 ms 53.11% llvm_module_pass [0m[38;2;000;255;255m [2 x 1.534 ms]
+[0m [38;2;255;000;255m 1.931 ms 33.43% [unaccounted]
+[0m [38;2;255;000;255m 1.537 ms 17.02% [unaccounted]
+[0m [38;2;255;000;255m 0.426 ms 2.00% [unaccounted]
+[0m [38;2;255;000;255m 2.889 ms 2.00% die [0m[38;2;000;255;255m [42 x 68.784 us]
+[0m [38;2;255;000;255m 0.797 ms 0.55% alg_simp [0m[38;2;000;255;255m [14 x 56.948 us]
+[0m [38;2;255;000;255m134.230 us 16.84% replace_all_usages_with[0m[38;2;000;255;255m [72 x 1.864 us]
+[0m [38;2;255;000;255m663.042 us 83.16% [unaccounted]
+[0m [38;2;255;000;255m 0.047 ms 0.03% loop_invariant_code_motion[0m[38;2;000;255;255m [14 x 3.338 us]
+[0m [38;2;255;000;255m 0.621 ms 0.43% simplify [0m[38;2;000;255;255m [14 x 44.363 us]
+[0m [38;2;255;000;255m 24.378 ms 16.91% whole_kernel_cse [0m[38;2;000;255;255m [14 x 1.741 ms]
+[0m [38;2;255;000;255m 7.641 ms 31.35% replace_all_usages_with[0m[38;2;000;255;255m [625 x 12.226 us]
+[0m [38;2;255;000;255m 16.737 ms 68.65% [unaccounted]
+[0m [38;2;255;000;255m 92.805 ms 64.37% cfg_optimization [0m[38;2;000;255;255m [10 x 9.280 ms]
+[0m [38;2;255;000;255m 64.329 ms 69.32% store_to_load_forwarding[0m[38;2;000;255;255m [12 x 5.361 ms]
+[0m [38;2;255;000;255m 30.363 ms 47.20% reaching_definition_analysis[0m[38;2;000;255;255m [12 x 2.530 ms]
+[0m [38;2;255;000;255m 12.398 ms 19.27% replace_all_usages_with[0m[38;2;000;255;255m [1289 x 9.619 us]
+[0m [38;2;255;000;255m 21.568 ms 33.53% [unaccounted]
+[0m [38;2;255;000;255m 27.198 ms 29.31% dead_store_elimination[0m[38;2;000;255;255m [12 x 2.267 ms]
+[0m [38;2;255;000;255m 19.209 ms 70.63% live_variable_analysis[0m[38;2;000;255;255m [12 x 1.601 ms]
+[0m [38;2;255;000;255m 0.115 ms 0.42% replace_all_usages_with[0m[38;2;000;255;255m [19 x 6.036 us]
+[0m [38;2;255;000;255m 7.874 ms 28.95% [unaccounted]
+[0m [38;2;255;000;255m 0.754 ms 0.81% die [0m[38;2;000;255;255m [10 x 75.364 us]
+[0m [38;2;255;000;255m 0.524 ms 0.56% [unaccounted]
+[0m [38;2;173;216;230m 0.010 ms 0.01% inlining [0m[38;2;000;255;255m [2 x 5.007 us]
+[0m [38;2;173;216;230m 0.020 ms 0.01% flag_access [0m[38;2;000;255;255m [4 x 5.007 us]
+[0m [38;2;173;216;230m 1.093 ms 0.64% offload [0m[38;2;000;255;255m [2 x 546.575 us]
+[0m [38;2;255;000;255m 0.006 ms 0.52% replace_all_usages_with[0m[38;2;000;255;255m [5 x 1.144 us]
+[0m [38;2;255;000;255m 0.109 ms 9.95% type_check [0m[38;2;000;255;255m [4 x 27.180 us]
+[0m [38;2;255;000;255m 0.979 ms 89.53% [unaccounted]
+[0m [38;2;173;216;230m 3.873 ms 2.28% cfg_optimization [0m[38;2;000;255;255m [2 x 1.937 ms]
+[0m [38;2;255;000;255m 2.122 ms 54.79% store_to_load_forwarding[0m[38;2;000;255;255m [2 x 1.061 ms]
+[0m [38;2;255;000;255m 1.180 ms 55.61% reaching_definition_analysis[0m[38;2;000;255;255m [2 x 589.967 us]
+[0m [38;2;255;000;255m 0.942 ms 44.39% [unaccounted]
+[0m [38;2;255;000;255m 1.531 ms 39.53% dead_store_elimination[0m[38;2;000;255;255m [2 x 765.562 us]
+[0m [38;2;255;000;255m 0.974 ms 63.63% live_variable_analysis[0m[38;2;000;255;255m [2 x 487.089 us]
+[0m [38;2;255;000;255m 0.557 ms 36.37% [unaccounted]
+[0m [38;2;255;000;255m 0.150 ms 3.87% die [0m[38;2;000;255;255m [2 x 74.983 us]
+[0m [38;2;255;000;255m 0.070 ms 1.81% [unaccounted]
+[0m [38;2;255;255;000m 48.419 ms 22.18% offload_to_executable[0m[38;2;000;255;255m [2 x 24.209 ms]
+[0m [38;2;173;216;230m 1.081 ms 2.23% verify [0m[38;2;000;255;255m [20 x 54.073 us]
+[0m [38;2;173;216;230m 0.330 ms 0.68% demote_atomics [0m[38;2;000;255;255m [4 x 82.552 us]
+[0m [38;2;255;000;255m 68.188 us 20.65% type_check [0m[38;2;000;255;255m [4 x 17.047 us]
+[0m [38;2;255;000;255m100.136 us 30.32% replace_all_usages_with[0m[38;2;000;255;255m [57 x 1.757 us]
+[0m [38;2;255;000;255m161.886 us 49.03% [unaccounted]
+[0m [38;2;173;216;230m 0.148 ms 0.31% type_check [0m[38;2;000;255;255m [8 x 18.477 us]
+[0m [38;2;173;216;230m 1.046 ms 2.16% make_thread_local [0m[38;2;000;255;255m [2 x 522.971 us]
+[0m [38;2;255;000;255m 0.033 ms 3.15% type_check [0m[38;2;000;255;255m [2 x 16.451 us]
+[0m [38;2;255;000;255m 1.013 ms 96.85% [unaccounted]
+[0m [38;2;173;216;230m 0.032 ms 0.07% make_mesh_thread_local[0m[38;2;000;255;255m [2 x 16.093 us]
+[0m [38;2;255;000;255m 32.187 us 100.00% type_check [0m[38;2;000;255;255m [2 x 16.093 us]
+[0m [38;2;173;216;230m 0.037 ms 0.08% demote_mesh_statements[0m[38;2;000;255;255m [2 x 18.477 us]
+[0m [38;2;255;000;255m 30.994 us 83.87% type_check [0m[38;2;000;255;255m [2 x 15.497 us]
+[0m [38;2;255;000;255m 5.960 us 16.13% [unaccounted]
+[0m [38;2;173;216;230m 0.004 ms 0.01% remove_range_assumption[0m[38;2;000;255;255m [2 x 2.027 us]
+[0m [38;2;173;216;230m 0.003 ms 0.01% remove_loop_unique [0m[38;2;000;255;255m [2 x 1.550 us]
+[0m [38;2;173;216;230m 0.351 ms 0.72% die [0m[38;2;000;255;255m [2 x 175.476 us]
+[0m [38;2;173;216;230m 0.016 ms 0.03% flag_access [0m[38;2;000;255;255m [2 x 7.987 us]
+[0m [38;2;173;216;230m 0.879 ms 1.82% demote_operations [0m[38;2;000;255;255m [2 x 439.525 us]
+[0m [38;2;255;000;255m436.306 us 49.63% replace_all_usages_with[0m[38;2;000;255;255m [162 x 2.693 us]
+[0m [38;2;255;000;255m109.196 us 12.42% type_check [0m[38;2;000;255;255m [4 x 27.299 us]
+[0m [38;2;255;000;255m333.548 us 37.94% [unaccounted]
+[0m [38;2;173;216;230m 43.794 ms 90.45% full_simplify [0m[38;2;000;255;255m [2 x 21.897 ms]
+[0m [38;2;255;000;255m 0.055 ms 0.13% extract_constant [0m[38;2;000;255;255m [6 x 9.179 us]
+[0m [38;2;255;000;255m 0.088 ms 0.20% unreachable_code_elimination[0m[38;2;000;255;255m [6 x 14.623 us]
+[0m [38;2;255;000;255m 0.189 ms 0.43% binary_op_simplify [0m[38;2;000;255;255m [6 x 31.551 us]
+[0m [38;2;255;000;255m 6.437 us 3.40% replace_all_usages_with[0m[38;2;000;255;255m [6 x 1.073 us]
+[0m [38;2;255;000;255m182.867 us 96.60% [unaccounted]
+[0m [38;2;255;000;255m 8.382 ms 19.14% constant_fold [0m[38;2;000;255;255m [6 x 1.397 ms]
+[0m [38;2;255;000;255m 8.245 ms 98.37% compile [0m[38;2;000;255;255m [1 x 8.245 ms]
+[0m [38;2;255;000;255m 0.031 ms 0.38% compile_to_executable[0m[38;2;000;255;255m [1 x 30.994 us]
+[0m [38;2;255;000;255m 14.067 us 45.38% compile_to_offloads [0m[38;2;000;255;255m [1 x 14.067 us]
+[0m [38;2;255;000;255m 0.954 us 6.78% frontend_type_check [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m 0.954 us 6.78% lower_ast [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m 0.000 us 0.00% type_check [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 3.338 us 23.73% verify [0m[38;2;000;255;255m [2 x 1.669 us]
+[0m [38;2;255;000;255m 0.000 us 0.00% demote_operations [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 7.868 us 55.93% offload [0m[38;2;000;255;255m [1 x 7.868 us]
+[0m [38;2;255;000;255m 1.192 us 15.15% type_check [0m[38;2;000;255;255m [2 x 596.046 ns]
+[0m [38;2;255;000;255m 6.676 us 84.85% [unaccounted]
+[0m [38;2;255;000;255m 0.954 us 6.78% [unaccounted]
+[0m [38;2;255;000;255m 16.928 us 54.62% offload_to_executable[0m[38;2;000;255;255m [1 x 16.928 us]
+[0m [38;2;255;000;255m 0.000 us 0.00% verify [0m[38;2;000;255;255m [10 x 0.000 ns]
+[0m [38;2;255;000;255m 1.907 us 11.27% demote_atomics [0m[38;2;000;255;255m [2 x 953.674 ns]
+[0m [38;2;255;000;255m 0.000 us 0.00% type_check [0m[38;2;000;255;255m [2 x 0.000 ns]
+[0m [38;2;255;000;255m 1.907 us 100.00% [unaccounted]
+[0m [38;2;255;000;255m 2.146 us 12.68% type_check [0m[38;2;000;255;255m [4 x 536.442 ns]
+[0m [38;2;255;000;255m 0.954 us 5.63% make_thread_local [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m953.674 ns type_check [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m 0.000 us 0.00% make_mesh_thread_local[0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 0.000 ns type_check [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 0.000 us 0.00% demote_mesh_statements[0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 0.000 ns type_check [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 0.000 us 0.00% remove_range_assumption[0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 1.192 us 7.04% remove_loop_unique [0m[38;2;000;255;255m [1 x 1.192 us]
+[0m [38;2;255;000;255m 0.954 us 5.63% die [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m 0.954 us 5.63% flag_access [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m 0.000 us 0.00% demote_operations [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 1.192 us 7.04% full_simplify [0m[38;2;000;255;255m [1 x 1.192 us]
+[0m [38;2;255;000;255m 0.000 us 0.00% simplify [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 1.192 us 100.00% die [0m[38;2;000;255;255m [1 x 1.192 us]
+[0m [38;2;255;000;255m 0.954 us 5.63% optimize_bit_struct_stores[0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m953.674 ns die [0m[38;2;000;255;255m [1 x 953.674 ns]
+[0m [38;2;255;000;255m 6.676 us 39.44% [unaccounted]
+[0m [38;2;255;000;255m 8.210 ms 99.58% codegen [0m[38;2;000;255;255m [1 x 8.210 ms]
+[0m [38;2;255;000;255m 3.573 ms 43.52% clone_struct_module [0m[38;2;000;255;255m [1 x 3.573 ms]
+[0m [38;2;255;000;255m 0.000 ms 0.00% CodeGenLLVMCPU [0m[38;2;000;255;255m [1 x 0.000 ns]
+[0m [38;2;255;000;255m 0.023 ms 0.28% emit_to_module [0m[38;2;000;255;255m [1 x 23.127 us]
+[0m [38;2;255;000;255m 4.611 ms 56.16% compile_module_to_executable[0m[38;2;000;255;255m [1 x 4.611 ms]
+[0m [38;2;255;000;255m 0.863 ms 18.71% eliminate_unused_functions[0m[38;2;000;255;255m [1 x 862.837 us]
+[0m [38;2;255;000;255m 2.964 ms 64.28% global_optimize_module_cpu[0m[38;2;000;255;255m [1 x 2.964 ms]
+[0m [38;2;255;000;255m 0.396 ms 13.36% llvm_function_pass [0m[38;2;000;255;255m [1 x 396.013 us]
+[0m [38;2;255;000;255m 1.584 ms 53.44% llvm_module_pass [0m[38;2;000;255;255m [1 x 1.584 ms]
+[0m [38;2;255;000;255m 0.984 ms 33.20% [unaccounted]
+[0m [38;2;255;000;255m 0.784 ms 17.01% [unaccounted]
+[0m [38;2;255;000;255m 0.020 ms 0.24% replace_all_usages_with[0m[38;2;000;255;255m [6 x 3.378 us]
+[0m [38;2;255;000;255m 0.117 ms 1.39% [unaccounted]
+[0m [38;2;255;000;255m 1.547 ms 3.53% die [0m[38;2;000;255;255m [18 x 85.950 us]
+[0m [38;2;255;000;255m 1.079 ms 2.46% alg_simp [0m[38;2;000;255;255m [6 x 179.887 us]
+[0m [38;2;255;000;255m 0.586 ms 54.27% replace_all_usages_with[0m[38;2;000;255;255m [208 x 2.816 us]
+[0m [38;2;255;000;255m 0.494 ms 45.73% [unaccounted]
+[0m [38;2;255;000;255m 0.018 ms 0.04% loop_invariant_code_motion[0m[38;2;000;255;255m [6 x 2.940 us]
+[0m [38;2;255;000;255m 4.671 ms 10.67% simplify [0m[38;2;000;255;255m [6 x 778.516 us]
+[0m [38;2;255;000;255m 1.221 ms 26.13% replace_all_usages_with[0m[38;2;000;255;255m [434 x 2.813 us]
+[0m [38;2;255;000;255m 2.472 ms 52.92% type_check [0m[38;2;000;255;255m [184 x 13.434 us]
+[0m [38;2;255;000;255m 0.978 ms 20.95% [unaccounted]
+[0m [38;2;255;000;255m 24.900 ms 56.86% whole_kernel_cse [0m[38;2;000;255;255m [6 x 4.150 ms]
+[0m [38;2;255;000;255m 4.452 ms 17.88% replace_all_usages_with[0m[38;2;000;255;255m [1206 x 3.692 us]
+[0m [38;2;255;000;255m 20.448 ms 82.12% [unaccounted]
+[0m [38;2;255;000;255m 2.859 ms 6.53% cfg_optimization [0m[38;2;000;255;255m [4 x 714.779 us]
+[0m [38;2;255;000;255m 1.027 ms 35.92% store_to_load_forwarding[0m[38;2;000;255;255m [5 x 205.374 us]
+[0m [38;2;255;000;255m 0.290 ms 28.26% reaching_definition_analysis[0m[38;2;000;255;255m [5 x 58.031 us]
+[0m [38;2;255;000;255m 0.165 ms 16.02% replace_all_usages_with[0m[38;2;000;255;255m [60 x 2.742 us]
+[0m [38;2;255;000;255m 0.572 ms 55.72% [unaccounted]
+[0m [38;2;255;000;255m 1.500 ms 52.46% dead_store_elimination[0m[38;2;000;255;255m [5 x 299.978 us]
+[0m [38;2;255;000;255m 0.734 ms 48.96% live_variable_analysis[0m[38;2;000;255;255m [5 x 146.866 us]
+[0m [38;2;255;000;255m 0.766 ms 51.04% [unaccounted]
+[0m [38;2;255;000;255m 0.300 ms 10.50% die [0m[38;2;000;255;255m [4 x 75.042 us]
+[0m [38;2;255;000;255m 0.032 ms 1.13% [unaccounted]
+[0m [38;2;173;216;230m 0.136 ms 0.28% optimize_bit_struct_stores[0m[38;2;000;255;255m [2 x 67.949 us]
+[0m [38;2;255;000;255m 81.301 us 59.82% die [0m[38;2;000;255;255m [2 x 40.650 us]
+[0m [38;2;255;000;255m 54.598 us 40.18% [unaccounted]
+[0m [38;2;173;216;230m 0.007 ms 0.01% replace_all_usages_with[0m[38;2;000;255;255m [6 x 1.192 us]
+[0m [38;2;173;216;230m 0.554 ms 1.14% [unaccounted]
+[0m [38;2;144;238;144m 72.283 ms 24.87% codegen [0m[38;2;000;255;255m [2 x 36.142 ms]
+[0m [38;2;255;255;000m 9.329 ms 12.91% clone_struct_module [0m[38;2;000;255;255m [2 x 4.665 ms]
+[0m [38;2;255;255;000m 0.000 ms 0.00% CodeGenLLVMCPU [0m[38;2;000;255;255m [2 x 0.000 ns]
+[0m [38;2;255;255;000m 0.478 ms 0.66% emit_to_module [0m[38;2;000;255;255m [2 x 239.015 us]
+[0m [38;2;255;255;000m 62.460 ms 86.41% compile_module_to_executable[0m[38;2;000;255;255m [2 x 31.230 ms]
+[0m [38;2;173;216;230m 1.526 ms 2.44% eliminate_unused_functions[0m[38;2;000;255;255m [2 x 762.939 us]
+[0m [38;2;173;216;230m 51.956 ms 83.18% global_optimize_module_cpu[0m[38;2;000;255;255m [2 x 25.978 ms]
+[0m [38;2;255;000;255m 2.474 ms 4.76% [Taichi] version 0.9.0, llvm 10.0.0, commit dd7654ae, linux, python 3.8.12
+[Taichi] Starting on arch=x64
+llvm_function_pass [0m[38;2;000;255;255m [2 x 1.237 ms]
+[0m [38;2;255;000;255m 46.998 ms 90.46% llvm_module_pass [0m[38;2;000;255;255m [2 x 23.499 ms]
+[0m [38;2;255;000;255m 2.484 ms 4.78% [unaccounted]
+[0m [38;2;173;216;230m 8.978 ms 14.37% [unaccounted]
+[0m[38;2;000;255;255m>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+[0m
\ No newline at end of file
diff --git a/setup.py b/setup.py
index df883ba3b..484bb2bc0 100644
--- a/setup.py
+++ b/setup.py
@@ -32,6 +32,7 @@
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
+ 'Programming Language :: Python :: 3.10',
]
project_name = os.getenv('PROJECT_NAME', 'taichi')
@@ -261,7 +262,7 @@ def run(self):
author='Taichi developers',
author_email='yuanmhu@gmail.com',
url='https://github.com/taichi-dev/taichi',
- python_requires=">=3.6,<3.10",
+ python_requires=">=3.6,<3.11",
install_requires=[
'numpy', 'sourceinspect>=0.0.4', 'colorama', 'astor',
'astunparse;python_version<"3.9"'
diff --git a/test_temp/jit/jit_pyinstrument.py b/test_temp/jit/jit_pyinstrument.py
new file mode 100644
index 000000000..bde6706c7
--- /dev/null
+++ b/test_temp/jit/jit_pyinstrument.py
@@ -0,0 +1,22 @@
+import time
+import taichi as ti
+from pyinstrument import Profiler
+from pyinstrument.renderers import ConsoleRenderer
+
+ti.init(arch=ti.cpu)
+a = ti.ndarray(float, 2048*2048)
+
+@ti.kernel
+def fill_half(a: ti.any_arr()):
+ for I in a:
+ a[I] = 0.5
+
+fill_half(a)
+profiler = Profiler()
+profiler.start()
+fill_half(a)
+session = profiler.stop()
+profile_renderer = ConsoleRenderer(unicode=True, color=True, show_all=True)
+print(profile_renderer.render(session))
+
+
diff --git a/test_temp/jit/jit_section.py b/test_temp/jit/jit_section.py
new file mode 100644
index 000000000..26613dd89
--- /dev/null
+++ b/test_temp/jit/jit_section.py
@@ -0,0 +1,21 @@
+import time
+import taichi as ti
+#from pyinstrument import Profiler
+#from pyinstrument.renderers import ConsoleRenderer
+
+from line_profiler import LineProfiler
+
+ti.init(arch=ti.cpu)
+a = ti.ndarray(float, 2048*2048)
+
+@ti.kernel
+def fill_half(a: ti.any_arr()):
+ for I in a:
+ a[I] = 0.5
+
+lprofiler = LineProfiler()
+lprofiler.add_function(ti.lang.kernel_impl.Kernel.func__)
+lp_wrapper = lprofiler(fill_half(a))
+lp_wrapper()
+
+lprofiler.print_stats()
diff --git a/test_temp/jit/jit_test.py b/test_temp/jit/jit_test.py
new file mode 100644
index 000000000..8dc0663f6
--- /dev/null
+++ b/test_temp/jit/jit_test.py
@@ -0,0 +1,28 @@
+import time
+import taichi as ti
+
+print('end2end time')
+
+t_start = time.perf_counter()
+ti.init(arch=ti.cpu)
+t_used = time.perf_counter() - t_start
+print('ti.init time:', "{:.8f}".format(t_used), "s")
+
+a = ti.ndarray(float, 2048*2048)
+
+@ti.kernel
+def fill_half(a: ti.any_arr()):
+ for I in a:
+ a[I] = 0.5
+
+t_start = time.perf_counter()
+fill_half(a)
+t_used = time.perf_counter() - t_start
+print('first execution time:', "{:.8f}".format(t_used), "s")
+
+
+t_start = time.perf_counter()
+fill_half(a)
+t_used = time.perf_counter() - t_start
+print('second execution time:', "{:.8f}".format(t_used), "s")
+
diff --git a/test_temp/jit/mpm88/mpm88_compute.py b/test_temp/jit/mpm88/mpm88_compute.py
new file mode 100644
index 000000000..93bb6680a
--- /dev/null
+++ b/test_temp/jit/mpm88/mpm88_compute.py
@@ -0,0 +1,92 @@
+# MPM-MLS in 88 lines of Taichi code, originally created by @yuanming-hu
+import taichi as ti
+
+ti.init(arch=ti.cpu)
+
+n_particles = 8192
+n_grid = 128
+dx = 1 / n_grid
+dt = 2e-4
+
+p_rho = 1
+p_vol = (dx * 0.5)**2
+p_mass = p_vol * p_rho
+gravity = 9.8
+bound = 3
+E = 400
+
+x = ti.Vector.field(2, float, n_particles)
+v = ti.Vector.field(2, float, n_particles)
+C = ti.Matrix.field(2, 2, float, n_particles)
+J = ti.field(float, n_particles)
+
+grid_v = ti.Vector.field(2, float, (n_grid, n_grid))
+grid_m = ti.field(float, (n_grid, n_grid))
+
+
+@ti.kernel
+def substep():
+ for i, j in grid_m:
+ grid_v[i, j] = [0, 0]
+ grid_m[i, j] = 0
+ for p in x:
+ Xp = x[p] / dx
+ base = int(Xp - 0.5)
+ fx = Xp - base
+ w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
+ stress = -dt * 4 * E * p_vol * (J[p] - 1) / dx**2
+ affine = ti.Matrix([[stress, 0], [0, stress]]) + p_mass * C[p]
+ for i, j in ti.static(ti.ndrange(3, 3)):
+ offset = ti.Vector([i, j])
+ dpos = (offset - fx) * dx
+ weight = w[i].x * w[j].y
+ grid_v[base + offset] += weight * (p_mass * v[p] + affine @ dpos)
+ grid_m[base + offset] += weight * p_mass
+ for i, j in grid_m:
+ if grid_m[i, j] > 0:
+ grid_v[i, j] /= grid_m[i, j]
+ grid_v[i, j].y -= dt * gravity
+ if i < bound and grid_v[i, j].x < 0:
+ grid_v[i, j].x = 0
+ if i > n_grid - bound and grid_v[i, j].x > 0:
+ grid_v[i, j].x = 0
+ if j < bound and grid_v[i, j].y < 0:
+ grid_v[i, j].y = 0
+ if j > n_grid - bound and grid_v[i, j].y > 0:
+ grid_v[i, j].y = 0
+ for p in x:
+ Xp = x[p] / dx
+ base = int(Xp - 0.5)
+ fx = Xp - base
+ w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
+ new_v = ti.Vector.zero(float, 2)
+ new_C = ti.Matrix.zero(float, 2, 2)
+ for i, j in ti.static(ti.ndrange(3, 3)):
+ offset = ti.Vector([i, j])
+ dpos = (offset - fx) * dx
+ weight = w[i].x * w[j].y
+ g_v = grid_v[base + offset]
+ new_v += weight * g_v
+ new_C += 4 * weight * g_v.outer_product(dpos) / dx**2
+ v[p] = new_v
+ x[p] += dt * v[p]
+ J[p] *= 1 + dt * new_C.trace()
+ C[p] = new_C
+
+
+@ti.kernel
+def init():
+ for i in range(n_particles):
+ x[i] = [ti.random() * 0.4 + 0.2, ti.random() * 0.4 + 0.2]
+ v[i] = [0, -1]
+ J[i] = 1
+
+
+ti.clear_profile_info()
+init()
+ti.print_profile_info()
+
+substep()
+
+
+substep()
diff --git a/test_temp/jit/mpm88/mpm88_compute_time.py b/test_temp/jit/mpm88/mpm88_compute_time.py
new file mode 100644
index 000000000..833c40194
--- /dev/null
+++ b/test_temp/jit/mpm88/mpm88_compute_time.py
@@ -0,0 +1,98 @@
+# MPM-MLS in 88 lines of Taichi code, originally created by @yuanming-hu
+import time
+import taichi as ti
+
+print ("start ti.init")
+ti.init(arch=ti.cpu, log_level=ti.TRACE)
+print ("finish ti.init")
+
+n_particles = 8192
+n_grid = 128
+dx = 1 / n_grid
+dt = 2e-4
+
+p_rho = 1
+p_vol = (dx * 0.5)**2
+p_mass = p_vol * p_rho
+gravity = 9.8
+bound = 3
+E = 400
+
+x = ti.Vector.field(2, float, n_particles)
+v = ti.Vector.field(2, float, n_particles)
+C = ti.Matrix.field(2, 2, float, n_particles)
+J = ti.field(float, n_particles)
+
+grid_v = ti.Vector.field(2, float, (n_grid, n_grid))
+grid_m = ti.field(float, (n_grid, n_grid))
+
+
+@ti.kernel
+def substep():
+ for i, j in grid_m:
+ grid_v[i, j] = [0, 0]
+ grid_m[i, j] = 0
+ for p in x:
+ Xp = x[p] / dx
+ base = int(Xp - 0.5)
+ fx = Xp - base
+ w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
+ stress = -dt * 4 * E * p_vol * (J[p] - 1) / dx**2
+ affine = ti.Matrix([[stress, 0], [0, stress]]) + p_mass * C[p]
+ for i, j in ti.static(ti.ndrange(3, 3)):
+ offset = ti.Vector([i, j])
+ dpos = (offset - fx) * dx
+ weight = w[i].x * w[j].y
+ grid_v[base + offset] += weight * (p_mass * v[p] + affine @ dpos)
+ grid_m[base + offset] += weight * p_mass
+ for i, j in grid_m:
+ if grid_m[i, j] > 0:
+ grid_v[i, j] /= grid_m[i, j]
+ grid_v[i, j].y -= dt * gravity
+ if i < bound and grid_v[i, j].x < 0:
+ grid_v[i, j].x = 0
+ if i > n_grid - bound and grid_v[i, j].x > 0:
+ grid_v[i, j].x = 0
+ if j < bound and grid_v[i, j].y < 0:
+ grid_v[i, j].y = 0
+ if j > n_grid - bound and grid_v[i, j].y > 0:
+ grid_v[i, j].y = 0
+ for p in x:
+ Xp = x[p] / dx
+ base = int(Xp - 0.5)
+ fx = Xp - base
+ w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
+ new_v = ti.Vector.zero(float, 2)
+ new_C = ti.Matrix.zero(float, 2, 2)
+ for i, j in ti.static(ti.ndrange(3, 3)):
+ offset = ti.Vector([i, j])
+ dpos = (offset - fx) * dx
+ weight = w[i].x * w[j].y
+ g_v = grid_v[base + offset]
+ new_v += weight * g_v
+ new_C += 4 * weight * g_v.outer_product(dpos) / dx**2
+ v[p] = new_v
+ x[p] += dt * v[p]
+ J[p] *= 1 + dt * new_C.trace()
+ C[p] = new_C
+
+
+@ti.kernel
+def init():
+ for i in range(n_particles):
+ x[i] = [ti.random() * 0.4 + 0.2, ti.random() * 0.4 + 0.2]
+ v[i] = [0, -1]
+ J[i] = 1
+
+
+print ("start init")
+init()
+print ("finish init")
+
+print ("start substep")
+substep()
+print ("finish substep")
+
+print ("start substep")
+substep()
+print ("finish substep")
diff --git a/test_temp/jit/mpm88/trace_with_details.txt b/test_temp/jit/mpm88/trace_with_details.txt
new file mode 100644
index 000000000..0f4785e89
--- /dev/null
+++ b/test_temp/jit/mpm88/trace_with_details.txt
@@ -0,0 +1,111 @@
+[T 02/22/22 13:49:12.586 14398] [program.cpp:Program@51] Program initializing...
+[T 02/22/22 13:49:12.586 14398] [snode_tree_buffer_manager.cpp:SNodeTreeBufferManager@11] SNode tree buffer manager created.
+[T 02/22/22 13:49:12.586 14398] [llvm_context.cpp:TaichiLLVMContext@63] Creating Taichi llvm context for arch: x64
+[T 02/22/22 13:49:12.586 14398] [llvm_context.cpp:get_this_thread_data@649] Creating thread local data for thread 140442101912512
+[T 02/22/22 13:49:12.586 14398] [llvm_context.cpp:TaichiLLVMContext@97] Taichi llvm context created.
+[T 02/22/22 13:49:12.586 14398] [memory_pool.cpp:MemoryPool@13] Memory pool created. Default buffer size per allocator = 1024 MB
+[T 02/22/22 13:49:12.681 14398] [program.cpp:Program@159] Program (0x55f9c8bb44d0) arch=x64 initialized.
+[T 02/22/22 13:49:12.681 14398] [misc.py:init@347] Materializing runtime...
+[T 02/22/22 13:49:12.681 14398] [unified_allocator.cpp:UnifiedAllocator@45] Memory allocated. Allocation time = 4.05e-06 s
+[T 02/22/22 13:49:12.681 14398] [unified_allocator.h:allocate@39] UM [data=140436605714432] allocate() request=256 remain=1073741824
+[T 02/22/22 13:49:12.681 14398] [llvm_program.cpp:materialize_runtime@394] Allocating 16 random states (used by CUDA only)
+[T 02/22/22 13:49:12.691 14398] [unified_allocator.h:allocate@39] UM [data=140436605714432] allocate() request=35256 remain=1073741568
+[T 02/22/22 13:49:12.691 14398] [unified_allocator.h:allocate@39] UM [data=140436605714432] allocate() request=2097160 remain=1073706312
+[T 02/22/22 13:49:12.691 14398] [unified_allocator.h:allocate@39] UM [data=140436605714432] allocate() request=1048576 remain=1071607800
+[T 02/22/22 13:49:12.691 14398] [unified_allocator.h:allocate@39] UM [data=140436605714432] allocate() request=320 remain=1070555136
+[T 02/22/22 13:49:12.691 14398] [llvm_program.cpp:materialize_runtime@403] LLVMRuntime initialized (excluding `root`)
+[T 02/22/22 13:49:12.691 14398] [llvm_program.cpp:materialize_runtime@406] LLVMRuntime pointer fetched
+[T 02/22/22 13:49:12.691 14398] [hacked_signal_handler.cpp:HackedSignalRegister@62] Taichi signal handlers registered. Thread ID = 14398
+[T 02/22/22 13:49:12.702 14398] [llvm_program.cpp:initialize_llvm_runtime_snodes@183] Allocating data structure of size 491520 bytes
+[T 02/22/22 13:49:12.702 14398] [snode_tree_buffer_manager.cpp:allocate@44] allocating memory for SNode Tree 0
+[T 02/22/22 13:49:12.702 14398] [unified_allocator.h:allocate@39] UM [data=140436605714432] allocate() request=491520 remain=1070554816
+[T 02/22/22 13:49:12.702 14398] [kernel_impl.py:materialize@441] Compiling kernel init_c58_0...
+[T 02/22/22 13:49:12.703 14398] [kernel_impl.py:materialize@476] [ JIT] creating kernel
+[T 02/22/22 13:49:12.703 14398] [program.h:kernel@186] [ JIT] make unique Kernel
+[T 02/22/22 13:49:12.703 14398] [kernel.cpp:Kernel@35] [ JIT] Kernel ctor 2
+[T 02/22/22 13:49:12.703 14398] [kernel.cpp:init@394] [ JIT] Kernel init
+[T 02/22/22 13:49:12.703 14398] [kernel.cpp:init@418] [ JIT] pos2
+[T 02/22/22 13:49:12.704 14398] [kernel.cpp:init@428] [ JIT] call compile()
+[T 02/22/22 13:49:12.704 14398] [kernel_impl.py:materialize@480] [ JIT] materialize rest] 0.0016105850008898415
+[T 02/22/22 13:49:12.704 14398] [kernel_impl.py:ensure_compiled@661] [ JIT] materialize()] 0.01229692600099952
+[T 02/22/22 13:49:12.704 14398] [kernel_impl.py:__call__@677] [ JIT]------------]--------
+[T 02/22/22 13:49:12.704 14398] [kernel_impl.py:func__@622] [ JIT] t_kernel(launch_ctx)
+[T 02/22/22 13:49:12.704 14398] [kernel.cpp:operator()@109] [ JIT] Kernel::operator()
+[T 02/22/22 13:49:12.704 14398] [llvm_program.cpp:compile@134] [ JIT] LlvmProgramImpl::compile
+[T 02/22/22 13:49:12.706 14398] [llvm_program.cpp:compile@137] [ JIT] kernel lower
+[T 02/22/22 13:49:12.706 14398] [llvm_program.cpp:compile@140] [ JIT] kernel return codegen
+[T 02/22/22 13:49:12.730 14398] [codegen_llvm.cpp:operator()@2246] Launching kernel init_c58_0_kernel
+[T 02/22/22 13:49:12.730 14398] [kernel_impl.py:__call__@680] [ JIT]------------] 0.02549659599935694
+[T 02/22/22 13:49:12.730 14398] [kernel_impl.py:materialize@441] Compiling kernel substep_c56_0...
+[T 02/22/22 13:49:12.732 14398] [kernel_impl.py:materialize@476] [ JIT] creating kernel
+[T 02/22/22 13:49:12.732 14398] [program.h:kernel@186] [ JIT] make unique Kernel
+[T 02/22/22 13:49:12.732 14398] [kernel.cpp:Kernel@35] [ JIT] Kernel ctor 2
+[T 02/22/22 13:49:12.732 14398] [kernel.cpp:init@394] [ JIT] Kernel init
+[T 02/22/22 13:49:12.732 14398] [kernel.cpp:init@418] [ JIT] pos2
+[T 02/22/22 13:49:12.808 14398] [kernel.cpp:init@428] [ JIT] call compile()
+[T 02/22/22 13:49:12.808 14398] [kernel_impl.py:materialize@480] [ JIT] materialize rest] 0.07633498200084432
+[T 02/22/22 13:49:12.809 14398] [kernel_impl.py:ensure_compiled@661] [ JIT] materialize()] 0.07872364200011361
+[T 02/22/22 13:49:12.809 14398] [kernel_impl.py:__call__@677] [ JIT]------------]--------
+[T 02/22/22 13:49:12.809 14398] [kernel_impl.py:func__@622] [ JIT] t_kernel(launch_ctx)
+[T 02/22/22 13:49:12.809 14398] [kernel.cpp:operator()@109] [ JIT] Kernel::operator()
+[T 02/22/22 13:49:12.809 14398] [llvm_program.cpp:compile@134] [ JIT] LlvmProgramImpl::compile
+[T 02/22/22 13:49:12.828 14398] [kernel.cpp:Kernel@27] [ JIT] Kernel ctor 1
+[T 02/22/22 13:49:12.828 14398] [kernel.cpp:init@394] [ JIT] Kernel init
+[T 02/22/22 13:49:12.828 14398] [kernel.cpp:init@418] [ JIT] pos2
+[T 02/22/22 13:49:12.828 14398] [kernel.cpp:init@428] [ JIT] call compile()
+[T 02/22/22 13:49:12.828 14398] [constant_fold.cpp:get_jit_evaluator_kernel@68] Saving JIT evaluator cache entry id=12397727920192291077
+[T 02/22/22 13:49:12.828 14398] [kernel.cpp:operator()@109] [ JIT] Kernel::operator()
+[T 02/22/22 13:49:12.828 14398] [llvm_program.cpp:compile@134] [ JIT] LlvmProgramImpl::compile
+[T 02/22/22 13:49:12.828 14398] [llvm_program.cpp:compile@137] [ JIT] kernel lower
+[T 02/22/22 13:49:12.828 14398] [llvm_program.cpp:compile@140] [ JIT] kernel return codegen
+[T 02/22/22 13:49:12.838 14398] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_0_kernel
+[T 02/22/22 13:49:12.838 14398] [kernel.cpp:Kernel@27] [ JIT] Kernel ctor 1
+[T 02/22/22 13:49:12.838 14398] [kernel.cpp:init@394] [ JIT] Kernel init
+[T 02/22/22 13:49:12.838 14398] [kernel.cpp:init@418] [ JIT] pos2
+[T 02/22/22 13:49:12.838 14398] [kernel.cpp:init@428] [ JIT] call compile()
+[T 02/22/22 13:49:12.838 14398] [constant_fold.cpp:get_jit_evaluator_kernel@68] Saving JIT evaluator cache entry id=12397727918246134016
+[T 02/22/22 13:49:12.838 14398] [kernel.cpp:operator()@109] [ JIT] Kernel::operator()
+[T 02/22/22 13:49:12.838 14398] [llvm_program.cpp:compile@134] [ JIT] LlvmProgramImpl::compile
+[T 02/22/22 13:49:12.838 14398] [llvm_program.cpp:compile@137] [ JIT] kernel lower
+[T 02/22/22 13:49:12.838 14398] [llvm_program.cpp:compile@140] [ JIT] kernel return codegen
+[T 02/22/22 13:49:12.847 14398] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_1_kernel
+[T 02/22/22 13:49:13.014 14398] [kernel.cpp:Kernel@27] [ JIT] Kernel ctor 1
+[T 02/22/22 13:49:13.014 14398] [kernel.cpp:init@394] [ JIT] Kernel init
+[T 02/22/22 13:49:13.014 14398] [kernel.cpp:init@418] [ JIT] pos2
+[T 02/22/22 13:49:13.014 14398] [kernel.cpp:init@428] [ JIT] call compile()
+[T 02/22/22 13:49:13.014 14398] [constant_fold.cpp:get_jit_evaluator_kernel@68] Saving JIT evaluator cache entry id=12397727920259663113
+[T 02/22/22 13:49:13.014 14398] [kernel.cpp:operator()@109] [ JIT] Kernel::operator()
+[T 02/22/22 13:49:13.014 14398] [llvm_program.cpp:compile@134] [ JIT] LlvmProgramImpl::compile
+[T 02/22/22 13:49:13.015 14398] [llvm_program.cpp:compile@137] [ JIT] kernel lower
+[T 02/22/22 13:49:13.015 14398] [llvm_program.cpp:compile@140] [ JIT] kernel return codegen
+[T 02/22/22 13:49:13.023 14398] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_2_kernel
+[T 02/22/22 13:49:13.023 14398] [kernel.cpp:operator()@109] [ JIT] Kernel::operator()
+[T 02/22/22 13:49:13.023 14398] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_2_kernel
+[T 02/22/22 13:49:13.023 14398] [kernel.cpp:operator()@109] [ JIT] Kernel::operator()
+[T 02/22/22 13:49:13.023 14398] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_2_kernel
+[T 02/22/22 13:49:13.023 14398] [kernel.cpp:operator()@109] [ JIT] Kernel::operator()
+[T 02/22/22 13:49:13.023 14398] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_2_kernel
+[T 02/22/22 13:49:13.023 14398] [kernel.cpp:operator()@109] [ JIT] Kernel::operator()
+[T 02/22/22 13:49:13.023 14398] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_2_kernel
+[T 02/22/22 13:49:13.023 14398] [kernel.cpp:operator()@109] [ JIT] Kernel::operator()
+[T 02/22/22 13:49:13.023 14398] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_2_kernel
+[T 02/22/22 13:49:13.026 14398] [llvm_program.cpp:compile@137] [ JIT] kernel lower
+[T 02/22/22 13:49:13.026 14398] [llvm_program.cpp:compile@140] [ JIT] kernel return codegen
+[T 02/22/22 13:49:13.074 14398] [codegen_llvm.cpp:operator()@2246] Launching kernel substep_c56_0_kernel
+[T 02/22/22 13:49:13.074 14398] [kernel_impl.py:__call__@680] [ JIT]------------] 0.2657265729994833
+[T 02/22/22 13:49:13.074 14398] [kernel_impl.py:materialize@434] [ JIT] compiled, returned
+[T 02/22/22 13:49:13.075 14398] [kernel_impl.py:ensure_compiled@661] [ JIT] materialize()] 4.139199882047251e-05
+[T 02/22/22 13:49:13.075 14398] [kernel_impl.py:__call__@677] [ JIT]------------]--------
+[T 02/22/22 13:49:13.075 14398] [kernel_impl.py:func__@622] [ JIT] t_kernel(launch_ctx)
+[T 02/22/22 13:49:13.075 14398] [kernel.cpp:operator()@109] [ JIT] Kernel::operator()
+[T 02/22/22 13:49:13.075 14398] [codegen_llvm.cpp:operator()@2246] Launching kernel substep_c56_0_kernel
+[T 02/22/22 13:49:13.075 14398] [kernel_impl.py:__call__@680] [ JIT]------------] 0.0006230629987840075
+[Taichi] version 0.9.0, llvm 10.0.0, commit dd7654ae, linux, python 3.8.12
+[Taichi] Starting on arch=x64
+ti init time: 0.10585920 s
+init() time: 0.03791171 s
+>>>>>>> 1 substep() time: 0.34459417 s
+2 substep() time: 0.00072633 s
+[T 02/22/22 13:49:13.105 14398] [program.cpp:finalize@467] Program finalizing...
+[T 02/22/22 13:49:13.106 14398] [program.cpp:finalize@515] Program (0x55f9c8bb44d0) finalized_.
+[T 02/22/22 13:49:13.110 14398] [hacked_signal_handler.cpp:~HackedSignalRegister@81] Taichi signal handlers unregistered. Thread ID = 14398
diff --git a/test_temp/jit/mpm88/trace_with_step_time.txt b/test_temp/jit/mpm88/trace_with_step_time.txt
new file mode 100644
index 000000000..1abe6abca
--- /dev/null
+++ b/test_temp/jit/mpm88/trace_with_step_time.txt
@@ -0,0 +1,46 @@
+[T 02/22/22 10:43:31.687 4690] [program.cpp:Program@51] Program initializing...
+[T 02/22/22 10:43:31.687 4690] [snode_tree_buffer_manager.cpp:SNodeTreeBufferManager@11] SNode tree buffer manager created.
+[T 02/22/22 10:43:31.687 4690] [llvm_context.cpp:TaichiLLVMContext@63] Creating Taichi llvm context for arch: x64
+[T 02/22/22 10:43:31.687 4690] [llvm_context.cpp:get_this_thread_data@649] Creating thread local data for thread 140328470250432
+[T 02/22/22 10:43:31.687 4690] [llvm_context.cpp:TaichiLLVMContext@97] Taichi llvm context created.
+[T 02/22/22 10:43:31.687 4690] [memory_pool.cpp:MemoryPool@13] Memory pool created. Default buffer size per allocator = 1024 MB
+[T 02/22/22 10:43:31.782 4690] [program.cpp:Program@159] Program (0x55bbab715710) arch=x64 initialized.
+[T 02/22/22 10:43:31.782 4690] [misc.py:init@347] Materializing runtime...
+[T 02/22/22 10:43:31.782 4690] [unified_allocator.cpp:UnifiedAllocator@45] Memory allocated. Allocation time = 3.81e-06 s
+[T 02/22/22 10:43:31.782 4690] [unified_allocator.h:allocate@39] UM [data=140322973622272] allocate() request=256 remain=1073741824
+[T 02/22/22 10:43:31.782 4690] [llvm_program.cpp:materialize_runtime@391] Allocating 16 random states (used by CUDA only)
+[T 02/22/22 10:43:31.792 4690] [unified_allocator.h:allocate@39] UM [data=140322973622272] allocate() request=35256 remain=1073741568
+[T 02/22/22 10:43:31.792 4690] [unified_allocator.h:allocate@39] UM [data=140322973622272] allocate() request=2097160 remain=1073706312
+[T 02/22/22 10:43:31.792 4690] [unified_allocator.h:allocate@39] UM [data=140322973622272] allocate() request=1048576 remain=1071607800
+[T 02/22/22 10:43:31.792 4690] [unified_allocator.h:allocate@39] UM [data=140322973622272] allocate() request=320 remain=1070555136
+[T 02/22/22 10:43:31.792 4690] [llvm_program.cpp:materialize_runtime@400] LLVMRuntime initialized (excluding `root`)
+[T 02/22/22 10:43:31.792 4690] [llvm_program.cpp:materialize_runtime@403] LLVMRuntime pointer fetched
+[T 02/22/22 10:43:31.792 4690] [hacked_signal_handler.cpp:HackedSignalRegister@62] Taichi signal handlers registered. Thread ID = 4690
+[T 02/22/22 10:43:31.802 4690] [llvm_program.cpp:initialize_llvm_runtime_snodes@180] Allocating data structure of size 491520 bytes
+[T 02/22/22 10:43:31.802 4690] [snode_tree_buffer_manager.cpp:allocate@44] allocating memory for SNode Tree 0
+[T 02/22/22 10:43:31.802 4690] [unified_allocator.h:allocate@39] UM [data=140322973622272] allocate() request=491520 remain=1070554816
+[T 02/22/22 10:43:31.802 4690] [kernel_impl.py:materialize@437] Compiling kernel init_c58_0...
+[T 02/22/22 10:43:31.829 4690] [codegen_llvm.cpp:operator()@2246] Launching kernel init_c58_0_kernel
+[T 02/22/22 10:43:31.829 4690] [kernel_impl.py:materialize@437] Compiling kernel substep_c56_0...
+[T 02/22/22 10:43:31.928 4690] [constant_fold.cpp:get_jit_evaluator_kernel@68] Saving JIT evaluator cache entry id=16201203073215955205
+[T 02/22/22 10:43:31.938 4690] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_0_kernel
+[T 02/22/22 10:43:31.938 4690] [constant_fold.cpp:get_jit_evaluator_kernel@68] Saving JIT evaluator cache entry id=16201203071269798144
+[T 02/22/22 10:43:31.948 4690] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_1_kernel
+[T 02/22/22 10:43:32.111 4690] [constant_fold.cpp:get_jit_evaluator_kernel@68] Saving JIT evaluator cache entry id=16201203073283327241
+[T 02/22/22 10:43:32.120 4690] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_2_kernel
+[T 02/22/22 10:43:32.120 4690] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_2_kernel
+[T 02/22/22 10:43:32.120 4690] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_2_kernel
+[T 02/22/22 10:43:32.120 4690] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_2_kernel
+[T 02/22/22 10:43:32.120 4690] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_2_kernel
+[T 02/22/22 10:43:32.120 4690] [codegen_llvm.cpp:operator()@2246] Launching kernel jit_evaluator_2_kernel
+[T 02/22/22 10:43:32.171 4690] [codegen_llvm.cpp:operator()@2246] Launching kernel substep_c56_0_kernel
+[T 02/22/22 10:43:32.172 4690] [codegen_llvm.cpp:operator()@2246] Launching kernel substep_c56_0_kernel
+[Taichi] version 0.9.0, llvm 10.0.0, commit dd7654ae, linux, python 3.8.12
+[Taichi] Starting on arch=x64
+ti init time: 0.10571943 s
+init() time: 0.03646100 s
+>>>>>>> 1 substep() time: 0.34291046 s
+2 substep() time: 0.00053424 s
+[T 02/22/22 10:43:32.202 4690] [program.cpp:finalize@467] Program finalizing...
+[T 02/22/22 10:43:32.203 4690] [program.cpp:finalize@515] Program (0x55bbab715710) finalized_.
+[T 02/22/22 10:43:32.207 4690] [hacked_signal_handler.cpp:~HackedSignalRegister@81] Taichi signal handlers unregistered. Thread ID = 4690
diff --git a/test_temp/jit/r000hs/.norun b/test_temp/jit/r000hs/.norun
new file mode 100644
index 000000000..e69de29bb
diff --git a/test_temp/jit/r000hs/archive/binaries/[vdso]/0e58e4148bc0ce4e23cb901a87f22850/[vdso] b/test_temp/jit/r000hs/archive/binaries/[vdso]/0e58e4148bc0ce4e23cb901a87f22850/[vdso]
new file mode 100644
index 000000000..4b5b25278
Binary files /dev/null and b/test_temp/jit/r000hs/archive/binaries/[vdso]/0e58e4148bc0ce4e23cb901a87f22850/[vdso] differ
diff --git a/test_temp/jit/r000hs/archive/binaries/[vdso]/8de1bac5042ec43d09faecb1f06f864b/[vdso] b/test_temp/jit/r000hs/archive/binaries/[vdso]/8de1bac5042ec43d09faecb1f06f864b/[vdso]
new file mode 100644
index 000000000..11d8fdb44
Binary files /dev/null and b/test_temp/jit/r000hs/archive/binaries/[vdso]/8de1bac5042ec43d09faecb1f06f864b/[vdso] differ
diff --git a/test_temp/jit/r000hs/config/analysis_type.cfg b/test_temp/jit/r000hs/config/analysis_type.cfg
new file mode 100644
index 000000000..b46ea5487
--- /dev/null
+++ b/test_temp/jit/r000hs/config/analysis_type.cfg
@@ -0,0 +1,249 @@
+
+
+
+ %HotspotsAtypeName
+ %HotspotsAtypeShortName
+ %HotspotsAtypeDescription
+ %HotspotsAtypeShortDescription
+ hs
+ hotspots
+ 1
+ 1
+ fire solid
+ configs.analysis_type-hotspots_f1101
+
+
+
+
+
+
+ %HotspotsByCPUUsageViewpointName
+
+
+ %HotspotsByCPUUsageViewpointName
+
+
+
+
+
+ config://analysis_type/include/knobs.xsl?
+
+
+ &
+
+
+
+
+
+
+ %SamplingModeDescription
+
+ hw
+ hw
+
+
+
+
+
+ false
+
+ %EnableStackCollectionDescription
+ false
+
+
+
+
+ hotspotsGroup
+
+
+ %SamplingModeDescription
+
+ sw
+ hw
+ sw
+
+
+
+
+
+ false
+
+ %EnableStackCollectionDescription
+ false
+
+
+
+
+
+
+
+ %SlowGoodFrameThresholdDescription
+ 0.01
+ 1024000
+ 40
+
+
+ %GoodFastFrameThresholdDescription
+ 0.01
+ 1024000
+ 100
+
+
+
+ true
+ %EnableCharacterizationInsightsDescription
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ stack
+ 10
+ true
+
+
+
+ java,dotnet,python
+
+
+ java,python
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,
+
+
+
+
+
+ ,
+
+
+
+
+
+ ,
+
+
+
+
+
+
+
+
+
+
+ true
+ true
+
+
+ true
+
+
+
+
+
+ true
+
+
+ runsa
+
+
+
+
+
+
+ true
+ all
+
+
+
+
+ false
+ true
+
+
+
+
+
+
+ ,
+
+
+
+
+
+ ,
+
+
+
+
+
+
+ ,
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+ true
+
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/config/collection.cfg b/test_temp/jit/r000hs/config/collection.cfg
new file mode 100644
index 000000000..6292189c4
--- /dev/null
+++ b/test_temp/jit/r000hs/config/collection.cfg
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/config/context_values.cfg b/test_temp/jit/r000hs/config/context_values.cfg
new file mode 100644
index 000000000..167a3c97e
--- /dev/null
+++ b/test_temp/jit/r000hs/config/context_values.cfg
@@ -0,0 +1,358 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/config/log.cfg b/test_temp/jit/r000hs/config/log.cfg
new file mode 100644
index 000000000..f4622dd03
--- /dev/null
+++ b/test_temp/jit/r000hs/config/log.cfg
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/config/runss.options b/test_temp/jit/r000hs/config/runss.options
new file mode 100644
index 000000000..79d5bbbb2
--- /dev/null
+++ b/test_temp/jit/r000hs/config/runss.options
@@ -0,0 +1,18 @@
+-r
+/home/qiao/Taichi/taichi-qiao/test_temp/jit/r000hs
+--stack-stitching
+--data-limit-mb=1000
+--disk-space-limit=0
+--mrte-type=java,python
+--stack-unwind-limit=8388608
+--itt-config=frame
+--itt-config=task,event,counter
+--stackwalk=offline
+--mrte-mode=auto
+--type=cpu:counters:nostack
+--type=cpu:stack
+--interval=10
+--
+python
+mpm88_compute.py
+1000
diff --git a/test_temp/jit/r000hs/config/search_dir.cfg b/test_temp/jit/r000hs/config/search_dir.cfg
new file mode 100644
index 000000000..eaa5edc64
--- /dev/null
+++ b/test_temp/jit/r000hs/config/search_dir.cfg
@@ -0,0 +1,19 @@
+
+
+
+
+
+ 1
+
+
+ 2
+
+
+ 3
+
+
+ 4
+
+
+
+
diff --git a/test_temp/jit/r000hs/config/state.cfg b/test_temp/jit/r000hs/config/state.cfg
new file mode 100644
index 000000000..18a046704
--- /dev/null
+++ b/test_temp/jit/r000hs/config/state.cfg
@@ -0,0 +1,4 @@
+
+
+
+
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601702.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601702.0.th
new file mode 100644
index 000000000..365334e57
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601702.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601716.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601716.0.th
new file mode 100644
index 000000000..fd23c1975
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601716.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601746.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601746.0.th
new file mode 100644
index 000000000..56f8f8f2b
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601746.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601747.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601747.0.th
new file mode 100644
index 000000000..897c471ce
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601747.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601748.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601748.0.th
new file mode 100644
index 000000000..54f49fa56
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601748.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601749.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601749.0.th
new file mode 100644
index 000000000..7eb622d99
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601749.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601750.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601750.0.th
new file mode 100644
index 000000000..27b25cfea
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601750.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601751.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601751.0.th
new file mode 100644
index 000000000..0d1177804
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601751.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601752.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601752.0.th
new file mode 100644
index 000000000..d5e3b31cb
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601752.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601753.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601753.0.th
new file mode 100644
index 000000000..63bda20d4
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601753.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601754.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601754.0.th
new file mode 100644
index 000000000..836f7a9ed
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601754.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601755.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601755.0.th
new file mode 100644
index 000000000..7ea535d9b
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601755.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601756.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601756.0.th
new file mode 100644
index 000000000..8204f860e
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601756.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601757.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601757.0.th
new file mode 100644
index 000000000..bd38992d2
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601757.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601758.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601758.0.th
new file mode 100644
index 000000000..093ec0ef0
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601758.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601759.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601759.0.th
new file mode 100644
index 000000000..565cddbf3
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601759.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601760.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601760.0.th
new file mode 100644
index 000000000..98ec1c65c
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601760.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601761.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601761.0.th
new file mode 100644
index 000000000..229ee182a
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601761.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601762.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601762.0.th
new file mode 100644
index 000000000..1be108368
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601762.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601763.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601763.0.th
new file mode 100644
index 000000000..cec933212
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601763.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601764.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601764.0.th
new file mode 100644
index 000000000..31a59bc3d
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601764.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601765.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601765.0.th
new file mode 100644
index 000000000..7df872afc
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601765.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601766.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601766.0.th
new file mode 100644
index 000000000..cb2af8235
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601766.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601767.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601767.0.th
new file mode 100644
index 000000000..9895c53b3
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601767.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601768.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601768.0.th
new file mode 100644
index 000000000..0f63ffc3b
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601768.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601769.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601769.0.th
new file mode 100644
index 000000000..2a6b53812
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601769.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601770.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601770.0.th
new file mode 100644
index 000000000..9e98be026
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601770.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601771.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601771.0.th
new file mode 100644
index 000000000..404d9d5b7
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601771.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601772.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601772.0.th
new file mode 100644
index 000000000..b2334f40f
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601772.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601773.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601773.0.th
new file mode 100644
index 000000000..e425db0ee
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601773.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601774.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601774.0.th
new file mode 100644
index 000000000..98932205e
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601774.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601775.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601775.0.th
new file mode 100644
index 000000000..16c06d63d
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601775.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601776.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601776.0.th
new file mode 100644
index 000000000..e605012d7
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601776.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601777.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601777.0.th
new file mode 100644
index 000000000..dec578e11
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601777.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601778.0.th b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601778.0.th
new file mode 100644
index 000000000..074bc0bde
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0-2601778.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0.pytrace b/test_temp/jit/r000hs/data.0/2601697-2601702.0.pytrace
new file mode 100644
index 000000000..dee4b7ed7
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0.pytrace differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0.pytrace-sym b/test_temp/jit/r000hs/data.0/2601697-2601702.0.pytrace-sym
new file mode 100644
index 000000000..9a65f0fdb
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0.pytrace-sym differ
diff --git a/test_temp/jit/r000hs/data.0/2601697-2601702.0.trace b/test_temp/jit/r000hs/data.0/2601697-2601702.0.trace
new file mode 100644
index 000000000..69691220d
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601697-2601702.0.trace differ
diff --git a/test_temp/jit/r000hs/data.0/2601702-2601717.0-2601717.0.th b/test_temp/jit/r000hs/data.0/2601702-2601717.0-2601717.0.th
new file mode 100644
index 000000000..9944a43aa
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601702-2601717.0-2601717.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601702-2601717.0.trace b/test_temp/jit/r000hs/data.0/2601702-2601717.0.trace
new file mode 100644
index 000000000..52dc0a553
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601702-2601717.0.trace differ
diff --git a/test_temp/jit/r000hs/data.0/2601702-2601717.1-2601717.0.th b/test_temp/jit/r000hs/data.0/2601702-2601717.1-2601717.0.th
new file mode 100644
index 000000000..43f11e3a1
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601702-2601717.1-2601717.0.th differ
diff --git a/test_temp/jit/r000hs/data.0/2601702-2601717.1.trace b/test_temp/jit/r000hs/data.0/2601702-2601717.1.trace
new file mode 100644
index 000000000..8e9ea30f8
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/2601702-2601717.1.trace differ
diff --git a/test_temp/jit/r000hs/data.0/pc.2601702.jit b/test_temp/jit/r000hs/data.0/pc.2601702.jit
new file mode 100644
index 000000000..a649219d6
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/pc.2601702.jit differ
diff --git a/test_temp/jit/r000hs/data.0/systemcollector-2601697-pc.sc b/test_temp/jit/r000hs/data.0/systemcollector-2601697-pc.sc
new file mode 100644
index 000000000..0df615f0c
Binary files /dev/null and b/test_temp/jit/r000hs/data.0/systemcollector-2601697-pc.sc differ
diff --git a/test_temp/jit/r000hs/r000hs.vtune b/test_temp/jit/r000hs/r000hs.vtune
new file mode 100644
index 000000000..fa6669349
--- /dev/null
+++ b/test_temp/jit/r000hs/r000hs.vtune
@@ -0,0 +1,24 @@
+
+
+
+ 1645496383
+ pc
+ linux
+ Intel® VTune™ Profiler 2022.0.0
+ 621730
+ 16
+ 8
+ 1
+ 3500000000
+ 6
+ 167
+ 1
+ 11th Gen Intel(R) Core(TM) i9-11900K @ 3.50GHz
+ avx512
+ 1645496383
+ 1645496391
+
+ hs
+
+
+
diff --git a/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/container.metadata b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/container.metadata
new file mode 100644
index 000000000..ee892ce7d
--- /dev/null
+++ b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/container.metadata
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/grouper/0 b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/grouper/0
new file mode 100644
index 000000000..6e448db5e
Binary files /dev/null and b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/grouper/0 differ
diff --git a/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/grouper/mapping.xml b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/grouper/mapping.xml
new file mode 100644
index 000000000..6d50be211
--- /dev/null
+++ b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/grouper/mapping.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/container.metadata b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/container.metadata
new file mode 100644
index 000000000..8eab066e4
--- /dev/null
+++ b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/container.metadata
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/grouper/0 b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/grouper/0
new file mode 100644
index 000000000..0760b6337
Binary files /dev/null and b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/grouper/0 differ
diff --git a/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/grouper/mapping.xml b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/grouper/mapping.xml
new file mode 100644
index 000000000..6d50be211
--- /dev/null
+++ b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/grouper/mapping.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/container.metadata b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/container.metadata
new file mode 100644
index 000000000..db418f330
--- /dev/null
+++ b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/container.metadata
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/grouper/0 b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/grouper/0
new file mode 100644
index 000000000..8c32cf2a5
Binary files /dev/null and b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/grouper/0 differ
diff --git a/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/grouper/mapping.xml b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/grouper/mapping.xml
new file mode 100644
index 000000000..6d50be211
--- /dev/null
+++ b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/grouper/mapping.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_sched_and_counter_metrics/container.metadata b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_sched_and_counter_metrics/container.metadata
new file mode 100644
index 000000000..cd56e726d
--- /dev/null
+++ b/test_temp/jit/r000hs/sqlite-db/_cache_grouper_data4_sched_and_counter_metrics/container.metadata
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/sqlite-db/dicer.db b/test_temp/jit/r000hs/sqlite-db/dicer.db
new file mode 100644
index 000000000..c0380b481
Binary files /dev/null and b/test_temp/jit/r000hs/sqlite-db/dicer.db differ
diff --git a/test_temp/jit/r000hs/sqlite-db/grouper.metadata b/test_temp/jit/r000hs/sqlite-db/grouper.metadata
new file mode 100644
index 000000000..c63d79a20
--- /dev/null
+++ b/test_temp/jit/r000hs/sqlite-db/grouper.metadata
@@ -0,0 +1,82 @@
+
+
+
+
+
+
+
+
+
+
+ time
+ count
+ instanceCount
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ time
+ count
+ instanceCount
+
+
+
+
+
+
+
+
+
+ time
+ count
+ instanceCount
+
+
+
+
+
+ time
+
+
+
+
+
+
+
+
+
+
+
+
+ time
+ count
+ instanceCount
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/sqlite-db/offload_region_data/container.metadata b/test_temp/jit/r000hs/sqlite-db/offload_region_data/container.metadata
new file mode 100644
index 000000000..94869b4f7
--- /dev/null
+++ b/test_temp/jit/r000hs/sqlite-db/offload_region_data/container.metadata
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/sqlite-db/offload_region_operation_data/container.metadata b/test_temp/jit/r000hs/sqlite-db/offload_region_operation_data/container.metadata
new file mode 100644
index 000000000..f6c7b7410
--- /dev/null
+++ b/test_temp/jit/r000hs/sqlite-db/offload_region_operation_data/container.metadata
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/sqlite-db/timelinedb/dbint-645333858/instance/0/0 b/test_temp/jit/r000hs/sqlite-db/timelinedb/dbint-645333858/instance/0/0
new file mode 100644
index 000000000..715454072
Binary files /dev/null and b/test_temp/jit/r000hs/sqlite-db/timelinedb/dbint-645333858/instance/0/0 differ
diff --git a/test_temp/jit/r000hs/sqlite-db/timelinedb/dbint-645333858/instance/0/mapping.xml b/test_temp/jit/r000hs/sqlite-db/timelinedb/dbint-645333858/instance/0/mapping.xml
new file mode 100644
index 000000000..fe912f504
--- /dev/null
+++ b/test_temp/jit/r000hs/sqlite-db/timelinedb/dbint-645333858/instance/0/mapping.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/sqlite-db/timelinedb/dbint-97516135/aggregated/0/0 b/test_temp/jit/r000hs/sqlite-db/timelinedb/dbint-97516135/aggregated/0/0
new file mode 100644
index 000000000..76fa95772
Binary files /dev/null and b/test_temp/jit/r000hs/sqlite-db/timelinedb/dbint-97516135/aggregated/0/0 differ
diff --git a/test_temp/jit/r000hs/sqlite-db/timelinedb/dbint-97516135/aggregated/0/mapping.xml b/test_temp/jit/r000hs/sqlite-db/timelinedb/dbint-97516135/aggregated/0/mapping.xml
new file mode 100644
index 000000000..34890bf86
--- /dev/null
+++ b/test_temp/jit/r000hs/sqlite-db/timelinedb/dbint-97516135/aggregated/0/mapping.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/test_temp/jit/r000hs/sqlite-db/timelinedb/directory.xml b/test_temp/jit/r000hs/sqlite-db/timelinedb/directory.xml
new file mode 100644
index 000000000..d5bca712e
--- /dev/null
+++ b/test_temp/jit/r000hs/sqlite-db/timelinedb/directory.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/test_temp/jit/test_lineprofiler.py b/test_temp/jit/test_lineprofiler.py
new file mode 100644
index 000000000..f8c34a5d2
--- /dev/null
+++ b/test_temp/jit/test_lineprofiler.py
@@ -0,0 +1,37 @@
+import time
+import random
+
+def very_slow_random_generator():
+ time.sleep(5)
+ arr = [random.randint(1,100) for i in range(100000)]
+ return sum(arr) / len(arr)
+
+def slow_random_generator():
+ time.sleep(2)
+ arr = [random.randint(1,100) for i in range(100000)]
+ return sum(arr) / len(arr)
+
+def fast_random_generator():
+ time.sleep(1)
+ arr = [random.randint(1,100) for i in range(100000)]
+ return sum(arr) / len(arr)
+
+def main_func():
+ result = fast_random_generator()
+ print(result)
+
+ result = slow_random_generator()
+ print(result)
+
+ result = very_slow_random_generator()
+ print(result)
+
+from line_profiler import LineProfiler
+
+lprofiler = LineProfiler()
+
+lp_wrapper = lprofiler(main_func)
+
+lp_wrapper()
+
+lprofiler.print_stats()
diff --git a/test_temp/others/keep_running.py b/test_temp/others/keep_running.py
new file mode 100644
index 000000000..06d62ffde
--- /dev/null
+++ b/test_temp/others/keep_running.py
@@ -0,0 +1,2 @@
+for i in range(10):
+ exec(open("jit_test.py").read())
diff --git a/test_temp/others/ndarray_fill_test.py b/test_temp/others/ndarray_fill_test.py
new file mode 100644
index 000000000..4d43bdd7e
--- /dev/null
+++ b/test_temp/others/ndarray_fill_test.py
@@ -0,0 +1,13 @@
+import taichi as ti
+
+ti.init(arch=ti.cuda, log_level=ti.TRACE)
+#ti.init(arch=ti.cuda)
+
+a = ti.ndarray(float, 16)
+
+@ti.kernel
+def p():
+ print('p')
+
+
+p()
diff --git a/test_temp/others/numpy_test.py b/test_temp/others/numpy_test.py
new file mode 100644
index 000000000..06868ed50
--- /dev/null
+++ b/test_temp/others/numpy_test.py
@@ -0,0 +1,19 @@
+import taichi as ti
+import time
+
+ti.init(arch=ti.cuda)
+
+print("test to numpy")
+N = 2048*2048
+
+a = ti.ndarray(ti.f32, N)
+a.to_numpy
+
+iterations = 100000
+t_start = time.perf_counter()
+for i in range(iterations):
+ a.to_numpy
+t_used = time.perf_counter() - t_start
+print('total time:', "{:.3f}".format(t_used*1000), "ms")
+
+
diff --git a/test_temp/others/r000hs/.norun b/test_temp/others/r000hs/.norun
new file mode 100644
index 000000000..e69de29bb
diff --git a/test_temp/others/r000hs/archive/binaries/[vdso]/0e58e4148bc0ce4e23cb901a87f22850/[vdso] b/test_temp/others/r000hs/archive/binaries/[vdso]/0e58e4148bc0ce4e23cb901a87f22850/[vdso]
new file mode 100644
index 000000000..4b5b25278
Binary files /dev/null and b/test_temp/others/r000hs/archive/binaries/[vdso]/0e58e4148bc0ce4e23cb901a87f22850/[vdso] differ
diff --git a/test_temp/others/r000hs/archive/binaries/[vdso]/8de1bac5042ec43d09faecb1f06f864b/[vdso] b/test_temp/others/r000hs/archive/binaries/[vdso]/8de1bac5042ec43d09faecb1f06f864b/[vdso]
new file mode 100644
index 000000000..11d8fdb44
Binary files /dev/null and b/test_temp/others/r000hs/archive/binaries/[vdso]/8de1bac5042ec43d09faecb1f06f864b/[vdso] differ
diff --git a/test_temp/others/r000hs/config/analysis_type.cfg b/test_temp/others/r000hs/config/analysis_type.cfg
new file mode 100644
index 000000000..b46ea5487
--- /dev/null
+++ b/test_temp/others/r000hs/config/analysis_type.cfg
@@ -0,0 +1,249 @@
+
+
+
+ %HotspotsAtypeName
+ %HotspotsAtypeShortName
+ %HotspotsAtypeDescription
+ %HotspotsAtypeShortDescription
+ hs
+ hotspots
+ 1
+ 1
+ fire solid
+ configs.analysis_type-hotspots_f1101
+
+
+
+
+
+
+ %HotspotsByCPUUsageViewpointName
+
+
+ %HotspotsByCPUUsageViewpointName
+
+
+
+
+
+ config://analysis_type/include/knobs.xsl?
+
+
+ &
+
+
+
+
+
+
+ %SamplingModeDescription
+
+ hw
+ hw
+
+
+
+
+
+ false
+
+ %EnableStackCollectionDescription
+ false
+
+
+
+
+ hotspotsGroup
+
+
+ %SamplingModeDescription
+
+ sw
+ hw
+ sw
+
+
+
+
+
+ false
+
+ %EnableStackCollectionDescription
+ false
+
+
+
+
+
+
+
+ %SlowGoodFrameThresholdDescription
+ 0.01
+ 1024000
+ 40
+
+
+ %GoodFastFrameThresholdDescription
+ 0.01
+ 1024000
+ 100
+
+
+
+ true
+ %EnableCharacterizationInsightsDescription
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ stack
+ 10
+ true
+
+
+
+ java,dotnet,python
+
+
+ java,python
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,
+
+
+
+
+
+ ,
+
+
+
+
+
+ ,
+
+
+
+
+
+
+
+
+
+
+ true
+ true
+
+
+ true
+
+
+
+
+
+ true
+
+
+ runsa
+
+
+
+
+
+
+ true
+ all
+
+
+
+
+ false
+ true
+
+
+
+
+
+
+ ,
+
+
+
+
+
+ ,
+
+
+
+
+
+
+ ,
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+ true
+
+
+
+
+
+
diff --git a/test_temp/others/r000hs/config/applicationOutput.txt b/test_temp/others/r000hs/config/applicationOutput.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/test_temp/others/r000hs/config/collection.cfg b/test_temp/others/r000hs/config/collection.cfg
new file mode 100644
index 000000000..3890dc8ba
--- /dev/null
+++ b/test_temp/others/r000hs/config/collection.cfg
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/others/r000hs/config/context_values.cfg b/test_temp/others/r000hs/config/context_values.cfg
new file mode 100644
index 000000000..a9cd9ded1
--- /dev/null
+++ b/test_temp/others/r000hs/config/context_values.cfg
@@ -0,0 +1,358 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/others/r000hs/config/log.cfg b/test_temp/others/r000hs/config/log.cfg
new file mode 100644
index 000000000..7ef63acdf
--- /dev/null
+++ b/test_temp/others/r000hs/config/log.cfg
@@ -0,0 +1,35 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/others/r000hs/config/runss.options b/test_temp/others/r000hs/config/runss.options
new file mode 100644
index 000000000..e19c3eaaa
--- /dev/null
+++ b/test_temp/others/r000hs/config/runss.options
@@ -0,0 +1,17 @@
+-r
+/home/qiao/Taichi/taichi-qiao-mem/test_temp/r000hs
+--stack-stitching
+--data-limit-mb=1000
+--disk-space-limit=0
+--mrte-type=java,python
+--stack-unwind-limit=8388608
+--itt-config=frame
+--itt-config=task,event,counter
+--stackwalk=offline
+--mrte-mode=auto
+--type=cpu:counters:nostack
+--type=cpu:stack
+--interval=10
+--
+/home/qiao/miniconda3/envs/qiao-mem/bin/python
+jit_test.py
diff --git a/test_temp/others/r000hs/config/search_dir.cfg b/test_temp/others/r000hs/config/search_dir.cfg
new file mode 100644
index 000000000..eaa5edc64
--- /dev/null
+++ b/test_temp/others/r000hs/config/search_dir.cfg
@@ -0,0 +1,19 @@
+
+
+
+
+
+ 1
+
+
+ 2
+
+
+ 3
+
+
+ 4
+
+
+
+
diff --git a/test_temp/others/r000hs/config/state.cfg b/test_temp/others/r000hs/config/state.cfg
new file mode 100644
index 000000000..1e0d48399
--- /dev/null
+++ b/test_temp/others/r000hs/config/state.cfg
@@ -0,0 +1,41 @@
+
+
+
+
+ Hotspots by CPU Utilization
+
+
+
+
+ true
+
+ CounterFunctionModule
+ 2
+
+
+
+
+
+
+
+
+ /SourceCallStack
+ true
+
+ CounterSourceFunctionSourceFile
+ 2
+
+
+
+
+
+ /Function/ParentCallStack
+ false
+ 32.5615%
+
+ CounterFunctionModule
+ 2
+
+
+
+
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322572.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322572.0.th
new file mode 100644
index 000000000..cf9afb80f
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322572.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322586.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322586.0.th
new file mode 100644
index 000000000..da78e243d
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322586.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322639.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322639.0.th
new file mode 100644
index 000000000..b6d771ec9
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322639.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322640.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322640.0.th
new file mode 100644
index 000000000..6310803fb
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322640.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322641.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322641.0.th
new file mode 100644
index 000000000..eb2967ad3
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322641.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322642.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322642.0.th
new file mode 100644
index 000000000..80d3636d5
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322642.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322643.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322643.0.th
new file mode 100644
index 000000000..4a9c44510
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322643.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322644.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322644.0.th
new file mode 100644
index 000000000..8aa0248cd
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322644.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322645.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322645.0.th
new file mode 100644
index 000000000..c4d93c2c8
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322645.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322646.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322646.0.th
new file mode 100644
index 000000000..861230521
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322646.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322647.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322647.0.th
new file mode 100644
index 000000000..4b05a03da
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322647.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322648.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322648.0.th
new file mode 100644
index 000000000..572719dbc
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322648.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322649.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322649.0.th
new file mode 100644
index 000000000..1797b4c20
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322649.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322650.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322650.0.th
new file mode 100644
index 000000000..7732b1770
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322650.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322651.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322651.0.th
new file mode 100644
index 000000000..c528277ee
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322651.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322652.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322652.0.th
new file mode 100644
index 000000000..2f6d60651
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322652.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322653.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322653.0.th
new file mode 100644
index 000000000..2d7d74648
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322653.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322654.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322654.0.th
new file mode 100644
index 000000000..3284b4142
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322654.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322655.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322655.0.th
new file mode 100644
index 000000000..948803aa0
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322655.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322656.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322656.0.th
new file mode 100644
index 000000000..f9f663495
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322656.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322657.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322657.0.th
new file mode 100644
index 000000000..df983e0f9
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322657.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322658.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322658.0.th
new file mode 100644
index 000000000..b3d551a07
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322658.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322659.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322659.0.th
new file mode 100644
index 000000000..949adc80e
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322659.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322660.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322660.0.th
new file mode 100644
index 000000000..789e6d6ac
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322660.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322661.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322661.0.th
new file mode 100644
index 000000000..95c2781fd
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322661.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322662.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322662.0.th
new file mode 100644
index 000000000..1ed74c44f
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322662.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322663.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322663.0.th
new file mode 100644
index 000000000..d4e00de28
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322663.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322664.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322664.0.th
new file mode 100644
index 000000000..d7ded495c
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322664.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322665.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322665.0.th
new file mode 100644
index 000000000..b92b4c492
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322665.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322666.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322666.0.th
new file mode 100644
index 000000000..5f4eb47d6
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322666.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322667.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322667.0.th
new file mode 100644
index 000000000..2441cdf96
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322667.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322668.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322668.0.th
new file mode 100644
index 000000000..bd8420a00
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322668.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322669.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322669.0.th
new file mode 100644
index 000000000..cdb28d54b
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322669.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322670.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322670.0.th
new file mode 100644
index 000000000..673e15c75
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322670.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0-322671.0.th b/test_temp/others/r000hs/data.0/322567-322572.0-322671.0.th
new file mode 100644
index 000000000..94dd11d94
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0-322671.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0.pytrace b/test_temp/others/r000hs/data.0/322567-322572.0.pytrace
new file mode 100644
index 000000000..dee4b7ed7
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0.pytrace differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0.pytrace-sym b/test_temp/others/r000hs/data.0/322567-322572.0.pytrace-sym
new file mode 100644
index 000000000..37de8a1af
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0.pytrace-sym differ
diff --git a/test_temp/others/r000hs/data.0/322567-322572.0.trace b/test_temp/others/r000hs/data.0/322567-322572.0.trace
new file mode 100644
index 000000000..3a203eab7
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322567-322572.0.trace differ
diff --git a/test_temp/others/r000hs/data.0/322572-322587.0-322587.0.th b/test_temp/others/r000hs/data.0/322572-322587.0-322587.0.th
new file mode 100644
index 000000000..c93c51786
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322572-322587.0-322587.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322572-322587.0.trace b/test_temp/others/r000hs/data.0/322572-322587.0.trace
new file mode 100644
index 000000000..afca0b5ed
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322572-322587.0.trace differ
diff --git a/test_temp/others/r000hs/data.0/322572-322587.1-322587.0.th b/test_temp/others/r000hs/data.0/322572-322587.1-322587.0.th
new file mode 100644
index 000000000..16ebe8b88
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322572-322587.1-322587.0.th differ
diff --git a/test_temp/others/r000hs/data.0/322572-322587.1.trace b/test_temp/others/r000hs/data.0/322572-322587.1.trace
new file mode 100644
index 000000000..3ce481faa
Binary files /dev/null and b/test_temp/others/r000hs/data.0/322572-322587.1.trace differ
diff --git a/test_temp/others/r000hs/data.0/pc.322572.jit b/test_temp/others/r000hs/data.0/pc.322572.jit
new file mode 100644
index 000000000..28435e4e5
Binary files /dev/null and b/test_temp/others/r000hs/data.0/pc.322572.jit differ
diff --git a/test_temp/others/r000hs/data.0/systemcollector-322567-pc.sc b/test_temp/others/r000hs/data.0/systemcollector-322567-pc.sc
new file mode 100644
index 000000000..a4385b56a
Binary files /dev/null and b/test_temp/others/r000hs/data.0/systemcollector-322567-pc.sc differ
diff --git a/test_temp/others/r000hs/r000hs.vtune b/test_temp/others/r000hs/r000hs.vtune
new file mode 100644
index 000000000..798d649de
--- /dev/null
+++ b/test_temp/others/r000hs/r000hs.vtune
@@ -0,0 +1,24 @@
+
+
+
+ 1644573024
+ pc
+ linux
+ Intel® VTune™ Profiler 2022.0.0
+ 621730
+ 16
+ 8
+ 1
+ 3500000000
+ 6
+ 167
+ 1
+ 11th Gen Intel(R) Core(TM) i9-11900K @ 3.50GHz
+ avx512
+ 1644573024
+ 1644573031
+
+ hs
+
+
+
diff --git a/test_temp/others/r000hs/r000hs.vtuneproj b/test_temp/others/r000hs/r000hs.vtuneproj
new file mode 100644
index 000000000..4dc9bc36c
--- /dev/null
+++ b/test_temp/others/r000hs/r000hs.vtuneproj
@@ -0,0 +1,218 @@
+
+
+ localhost
+
+ 1644573070
+ pc
+ linux
+ Intel® VTune™ Profiler 2022.0.0
+ 621730
+ 16
+ 8
+ 1
+ 3500000000
+ 6
+ 167
+ 1
+ 11th Gen Intel(R) Core(TM) i9-11900K @ 3.50GHz
+ avx512
+
+
+
+
+ launch
+
+
+
+
+
+ launch
+ /home/qiao/Taichi/taichi-qiao-mem/test_temp/r000hs/r@@@{at}
+ true
+
+
+ allowMultipleRuns
+ false
+
+
+ analyzeKvmGuest
+ false
+
+
+ analyzeSystemWide
+ false
+
+
+ cpuMask
+
+
+
+ customCollector
+
+
+
+ dataLimit
+ 1000
+
+
+ enableRing
+ false
+
+
+ finalizationMode
+ fast
+
+
+ followChild
+ true
+
+
+ followChildGroup
+
+
+
+ followChildStrategy
+
+
+
+ groupForFinalizationControl
+
+
+
+ kvmGuestKallsyms
+
+
+
+ kvmGuestModules
+
+
+
+ kvmProfileGuest
+
+
+
+ mrteMode
+ auto
+
+
+ targetDurationType
+ short
+
+
+ targetRingBuffer
+ 0
+
+
+ targetType
+ launch
+
+
+ traceMpi
+ false
+
+
+ tracingMode
+
+
+
+ wrapperScriptContent
+
+
+
+ wrapperScriptPath
+
+
+
+
+
+
+
+
+
+ /home/qiao/intel/oneapi/vtune/2022.0.0/config/analysis_type/hotspots.cfg
+
+
+
+ /home/qiao/intel/oneapi/vtune/2022.0.0/config/analysis_type/survey.cfg
+
+
+ allowedViewpoints
+ %PerfSnapshotViewpointName
+
+
+ collectMemoryBW
+ false
+
+
+ collectPCIeBW
+ false
+
+
+ dramBandwidthLimitsAT
+ true
+
+
+ initialViewpoint
+ %PerfSnapshotViewpointName
+
+
+ isUArchUsageAvailable
+ true
+
+
+ useGpuCountingMode
+ true
+
+
+
+
+ /home/qiao/intel/oneapi/vtune/2022.0.0/config/analysis_type/hotspots.cfg
+
+
+ allowedViewpoints
+ %HotspotsByCPUUsageViewpointName
+
+
+ enableCharacterizationInsights
+ true
+
+
+ enableStackCollect
+ false
+
+
+ goodFastFrameThreshold
+ 100
+
+
+ groupForCustomControl
+
+
+
+ initialViewpoint
+ %HotspotsByCPUUsageViewpointName
+
+
+ samplingInterval
+ 1
+
+
+ samplingMode
+ sw
+
+
+ slowGoodFrameThreshold
+ 40
+
+
+ stackSizeCollect
+ 1024
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/container.metadata b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/container.metadata
new file mode 100644
index 000000000..878abe9eb
--- /dev/null
+++ b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/container.metadata
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/grouper/0 b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/grouper/0
new file mode 100644
index 000000000..09af7e0f6
Binary files /dev/null and b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/grouper/0 differ
diff --git a/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/grouper/mapping.xml b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/grouper/mapping.xml
new file mode 100644
index 000000000..6d50be211
--- /dev/null
+++ b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_dd_thread/grouper/mapping.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/container.metadata b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/container.metadata
new file mode 100644
index 000000000..272afd737
--- /dev/null
+++ b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/container.metadata
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/grouper/0 b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/grouper/0
new file mode 100644
index 000000000..9cd172777
Binary files /dev/null and b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/grouper/0 differ
diff --git a/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/grouper/mapping.xml b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/grouper/mapping.xml
new file mode 100644
index 000000000..6d50be211
--- /dev/null
+++ b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_data_grouper/grouper/mapping.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/container.metadata b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/container.metadata
new file mode 100644
index 000000000..db418f330
--- /dev/null
+++ b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/container.metadata
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/grouper/0 b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/grouper/0
new file mode 100644
index 000000000..295fa8934
Binary files /dev/null and b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/grouper/0 differ
diff --git a/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/grouper/mapping.xml b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/grouper/mapping.xml
new file mode 100644
index 000000000..6d50be211
--- /dev/null
+++ b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_global_time_interval_metrics/grouper/mapping.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_sched_and_counter_metrics/container.metadata b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_sched_and_counter_metrics/container.metadata
new file mode 100644
index 000000000..cd56e726d
--- /dev/null
+++ b/test_temp/others/r000hs/sqlite-db/_cache_grouper_data4_sched_and_counter_metrics/container.metadata
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/others/r000hs/sqlite-db/dicer.db b/test_temp/others/r000hs/sqlite-db/dicer.db
new file mode 100644
index 000000000..0c032b11e
Binary files /dev/null and b/test_temp/others/r000hs/sqlite-db/dicer.db differ
diff --git a/test_temp/others/r000hs/sqlite-db/grouper.metadata b/test_temp/others/r000hs/sqlite-db/grouper.metadata
new file mode 100644
index 000000000..c63d79a20
--- /dev/null
+++ b/test_temp/others/r000hs/sqlite-db/grouper.metadata
@@ -0,0 +1,82 @@
+
+
+
+
+
+
+
+
+
+
+ time
+ count
+ instanceCount
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ time
+ count
+ instanceCount
+
+
+
+
+
+
+
+
+
+ time
+ count
+ instanceCount
+
+
+
+
+
+ time
+
+
+
+
+
+
+
+
+
+
+
+
+ time
+ count
+ instanceCount
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/others/r000hs/sqlite-db/offload_region_data/container.metadata b/test_temp/others/r000hs/sqlite-db/offload_region_data/container.metadata
new file mode 100644
index 000000000..94869b4f7
--- /dev/null
+++ b/test_temp/others/r000hs/sqlite-db/offload_region_data/container.metadata
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/others/r000hs/sqlite-db/offload_region_operation_data/container.metadata b/test_temp/others/r000hs/sqlite-db/offload_region_operation_data/container.metadata
new file mode 100644
index 000000000..f6c7b7410
--- /dev/null
+++ b/test_temp/others/r000hs/sqlite-db/offload_region_operation_data/container.metadata
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_temp/others/r000hs/sqlite-db/timelinedb/dbint-1716639517/aggregated/0/0 b/test_temp/others/r000hs/sqlite-db/timelinedb/dbint-1716639517/aggregated/0/0
new file mode 100644
index 000000000..b17d6896d
Binary files /dev/null and b/test_temp/others/r000hs/sqlite-db/timelinedb/dbint-1716639517/aggregated/0/0 differ
diff --git a/test_temp/others/r000hs/sqlite-db/timelinedb/dbint-1716639517/aggregated/0/mapping.xml b/test_temp/others/r000hs/sqlite-db/timelinedb/dbint-1716639517/aggregated/0/mapping.xml
new file mode 100644
index 000000000..8876237f2
--- /dev/null
+++ b/test_temp/others/r000hs/sqlite-db/timelinedb/dbint-1716639517/aggregated/0/mapping.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/test_temp/others/r000hs/sqlite-db/timelinedb/dbint-1988985760/instance/0/0 b/test_temp/others/r000hs/sqlite-db/timelinedb/dbint-1988985760/instance/0/0
new file mode 100644
index 000000000..a9a6634a6
Binary files /dev/null and b/test_temp/others/r000hs/sqlite-db/timelinedb/dbint-1988985760/instance/0/0 differ
diff --git a/test_temp/others/r000hs/sqlite-db/timelinedb/dbint-1988985760/instance/0/mapping.xml b/test_temp/others/r000hs/sqlite-db/timelinedb/dbint-1988985760/instance/0/mapping.xml
new file mode 100644
index 000000000..84f15414a
--- /dev/null
+++ b/test_temp/others/r000hs/sqlite-db/timelinedb/dbint-1988985760/instance/0/mapping.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/test_temp/others/r000hs/sqlite-db/timelinedb/directory.xml b/test_temp/others/r000hs/sqlite-db/timelinedb/directory.xml
new file mode 100644
index 000000000..0786dd145
--- /dev/null
+++ b/test_temp/others/r000hs/sqlite-db/timelinedb/directory.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/test_temp/others/stream_demo.py b/test_temp/others/stream_demo.py
new file mode 100644
index 000000000..e58dd77ee
--- /dev/null
+++ b/test_temp/others/stream_demo.py
@@ -0,0 +1,58 @@
+import taichi as ti
+import time
+
+ti.init(arch=ti.cuda)
+
+dim, n_grid, steps, dt = 3, 128, 25, 8e-5
+
+n_particles = n_grid**dim // 2**(dim - 1)
+dx = 1 / n_grid
+
+p_rho = 1
+p_vol = (dx * 0.5)**2
+p_mass = p_vol * p_rho
+gravity = 9.8
+bound = 3
+E = 400
+
+x = ti.Vector.field(dim, float, n_particles)
+v = ti.Vector.field(dim, float, n_particles)
+C = ti.Matrix.field(dim, dim, float, n_particles)
+J = ti.field(float, n_particles)
+
+grid_v = ti.Vector.field(dim, float, (n_grid, ) * dim)
+grid_m = ti.field(float, (n_grid, ) * dim)
+
+neighbour = (3, ) * dim
+
+
+@ti.kernel
+def k1():
+ for I in ti.grouped(grid_m):
+ grid_v[I] = ti.zero(grid_v[I])
+ grid_m[I] = 0
+
+@ti.kernel
+def k2():
+ for p in x:
+ Xp = x[p] / dx
+ base = int(Xp - 0.5)
+ fx = Xp - base
+ w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
+ stress = -dt * 4 * E * p_vol * (J[p] - 1) / dx**2
+ affine = ti.Matrix.identity(float, dim) * stress + p_mass * C[p]
+ for offset in ti.static(ti.grouped(ti.ndrange(*neighbour))):
+ dpos = (offset - fx) * dx
+ weight = 1.0
+ for i in ti.static(range(dim)):
+ weight *= w[offset[i]][i]
+ grid_v[base + offset] += weight * (p_mass * v[p] + affine @ dpos)
+ grid_m[base + offset] += weight * p_mass
+
+
+t_start = time.perf_counter()
+k1()
+k2()
+t_used = time.perf_counter() - t_start
+print('execution time {:.3f}'.format(t_used*1000), "ms")
+
diff --git a/test_temp/others/test_mesh.py b/test_temp/others/test_mesh.py
new file mode 100644
index 000000000..b29ffbdb6
--- /dev/null
+++ b/test_temp/others/test_mesh.py
@@ -0,0 +1,28 @@
+import os
+import numpy as np
+import taichi as ti
+
+this_dir = os.path.dirname(os.path.abspath(__file__))
+model_file_path = os.path.join(this_dir, 'ell.json')
+
+ti.init(arch=ti.x64)
+
+def test_mesh_for(cell_reorder=False, vert_reorder=False, extra_tests=True):
+ mesh_builder = ti.Mesh.Tet()
+ mesh_builder.verts.place({'t': ti.i32}, reorder=vert_reorder)
+ mesh_builder.cells.place({'t': ti.i32}, reorder=cell_reorder)
+ mesh_builder.cells.link(mesh_builder.verts)
+ mesh_builder.verts.link(mesh_builder.cells)
+ mesh_builder.cells.link(mesh_builder.cells)
+ mesh_builder.verts.link(mesh_builder.verts)
+ model = mesh_builder.build(ti.Mesh.load_meta(model_file_path))
+
+ @ti.kernel
+ def cell_vert():
+ for c in model.cells:
+ for j in range(c.verts.size):
+ c.t += c.verts[j].id
+
+ cell_vert()
+
+test_mesh_for(False, False)
diff --git a/test_temp/others/test_ndarray_vulkan.py b/test_temp/others/test_ndarray_vulkan.py
new file mode 100644
index 000000000..45f31bb3c
--- /dev/null
+++ b/test_temp/others/test_ndarray_vulkan.py
@@ -0,0 +1,9 @@
+import taichi as ti
+
+ti.init(arch=ti.vulkan)
+
+a = ti.ndarray(float, 8)
+
+a[1] = 0.1
+
+print(a[1])
diff --git a/tests/_python_orig/__init__.py b/tests/_python_orig/__init__.py
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/tests/_python_orig/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/_python_orig/bls_test_template.py b/tests/_python_orig/bls_test_template.py
new file mode 100644
index 000000000..879b27981
--- /dev/null
+++ b/tests/_python_orig/bls_test_template.py
@@ -0,0 +1,288 @@
+import random
+
+import numpy as np
+
+import taichi as ti
+
+
+def bls_test_template(dim,
+ N,
+ bs,
+ stencil,
+ block_dim=None,
+ scatter=False,
+ benchmark=0,
+ dense=False):
+ x, y, y2 = ti.field(ti.i32), ti.field(ti.i32), ti.field(ti.i32)
+
+ index = ti.axes(*range(dim))
+ mismatch = ti.field(ti.i32, shape=())
+
+ if not isinstance(bs, (tuple, list)):
+ bs = [bs for _ in range(dim)]
+
+ grid_size = [N // bs[i] for i in range(dim)]
+
+ if dense:
+ create_block = lambda: ti.root.dense(index, grid_size)
+ else:
+ create_block = lambda: ti.root.pointer(index, grid_size)
+
+ if scatter:
+ block = create_block()
+
+ block.dense(index, bs).place(x)
+ block.dense(index, bs).place(y)
+ block.dense(index, bs).place(y2)
+ else:
+ create_block().dense(index, bs).place(x)
+ create_block().dense(index, bs).place(y)
+ create_block().dense(index, bs).place(y2)
+
+ ndrange = ((bs[i], N - bs[i]) for i in range(dim))
+
+ if block_dim is None:
+ block_dim = 1
+ for i in range(dim):
+ block_dim *= bs[i]
+
+ @ti.kernel
+ def populate():
+ for I in ti.grouped(ti.ndrange(*ndrange)):
+ s = 0
+ for i in ti.static(range(dim)):
+ s += I[i]**(i + 1)
+ x[I] = s
+
+ @ti.kernel
+ def apply(use_bls: ti.template(), y: ti.template()):
+ if ti.static(use_bls and not scatter):
+ ti.block_local(x)
+ if ti.static(use_bls and scatter):
+ ti.block_local(y)
+
+ ti.block_dim(block_dim)
+ for I in ti.grouped(x):
+ if ti.static(scatter):
+ for offset in ti.static(stencil):
+ y[I + ti.Vector(offset)] += x[I]
+ else:
+ # gather
+ s = 0
+ for offset in ti.static(stencil):
+ s = s + x[I + ti.Vector(offset)]
+ y[I] = s
+
+ populate()
+
+ if benchmark:
+ for i in range(benchmark):
+ x.snode.parent().deactivate_all()
+ if not scatter:
+ populate()
+ y.snode.parent().deactivate_all()
+ y2.snode.parent().deactivate_all()
+ apply(False, y2)
+ apply(True, y)
+ else:
+ # Simply test
+ apply(False, y2)
+ apply(True, y)
+
+ @ti.kernel
+ def check():
+ for I in ti.grouped(y2):
+ if y[I] != y2[I]:
+ print('check failed', I, y[I], y2[I])
+ mismatch[None] = 1
+
+ check()
+
+ ti.print_kernel_profile_info()
+
+ assert mismatch[None] == 0
+
+
+def bls_particle_grid(N,
+ ppc=8,
+ block_size=16,
+ scatter=True,
+ benchmark=0,
+ pointer_level=1,
+ sort_points=True,
+ use_offset=True):
+ M = N * N * ppc
+
+ m1 = ti.field(ti.f32)
+ m2 = ti.field(ti.f32)
+ m3 = ti.field(ti.f32)
+ pid = ti.field(ti.i32)
+ err = ti.field(ti.i32, shape=())
+
+ max_num_particles_per_block = block_size**2 * 4096
+
+ x = ti.Vector.field(2, dtype=ti.f32)
+
+ s1 = ti.field(dtype=ti.f32)
+ s2 = ti.field(dtype=ti.f32)
+ s3 = ti.field(dtype=ti.f32)
+
+ ti.root.dense(ti.i, M).place(x)
+ ti.root.dense(ti.i, M).place(s1, s2, s3)
+
+ if pointer_level == 1:
+ block = ti.root.pointer(ti.ij, N // block_size)
+ elif pointer_level == 2:
+ block = ti.root.pointer(ti.ij, N // block_size // 4).pointer(ti.ij, 4)
+ else:
+ raise ValueError('pointer_level must be 1 or 2')
+
+ if use_offset:
+ grid_offset = (-N // 2, -N // 2)
+ grid_offset_block = (-N // 2 // block_size, -N // 2 // block_size)
+ world_offset = -0.5
+ else:
+ grid_offset = (0, 0)
+ grid_offset_block = (0, 0)
+ world_offset = 0
+
+ block.dense(ti.ij, block_size).place(m1, offset=grid_offset)
+ block.dense(ti.ij, block_size).place(m2, offset=grid_offset)
+ block.dense(ti.ij, block_size).place(m3, offset=grid_offset)
+
+ block.dynamic(ti.l,
+ max_num_particles_per_block,
+ chunk_size=block_size**2 * ppc * 4).place(
+ pid, offset=grid_offset_block + (0, ))
+
+ bound = 0.1
+
+ extend = 4
+
+ x_ = [(random.random() * (1 - 2 * bound) + bound + world_offset,
+ random.random() * (1 - 2 * bound) + bound + world_offset)
+ for _ in range(M)]
+ if sort_points:
+ x_.sort(key=lambda q: int(q[0] * N) // block_size * N + int(q[1] * N)
+ // block_size)
+
+ x.from_numpy(np.array(x_, dtype=np.float32))
+
+ @ti.kernel
+ def insert():
+ ti.block_dim(256)
+ for i in x:
+ # It is important to ensure insert and p2g uses the exact same way to compute the base
+ # coordinates. Otherwise there might be coordinate mismatch due to float-point errors.
+ base = ti.Vector([
+ int(ti.floor(x[i][0] * N) - grid_offset[0]),
+ int(ti.floor(x[i][1] * N) - grid_offset[1])
+ ])
+ base_p = ti.rescale_index(m1, pid, base)
+ ti.append(pid.parent(), base_p, i)
+
+ scatter_weight = (N * N / M) * 0.01
+
+ @ti.kernel
+ def p2g(use_shared: ti.template(), m: ti.template()):
+ ti.block_dim(256)
+ if ti.static(use_shared):
+ ti.block_local(m)
+ for I in ti.grouped(pid):
+ p = pid[I]
+
+ u_ = ti.floor(x[p] * N).cast(ti.i32)
+ Im = ti.rescale_index(pid, m, I)
+ u0 = ti.assume_in_range(u_[0], Im[0], 0, 1)
+ u1 = ti.assume_in_range(u_[1], Im[1], 0, 1)
+
+ u = ti.Vector([u0, u1])
+
+ for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))):
+ m[u + offset] += scatter_weight
+
+ @ti.kernel
+ def p2g_naive():
+ ti.block_dim(256)
+ for p in x:
+ u = ti.floor(x[p] * N).cast(ti.i32)
+
+ for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))):
+ m3[u + offset] += scatter_weight
+
+ @ti.kernel
+ def fill_m1():
+ for i, j in ti.ndrange(N, N):
+ m1[i, j] = ti.random()
+
+ @ti.kernel
+ def g2p(use_shared: ti.template(), s: ti.template()):
+ ti.block_dim(256)
+ if ti.static(use_shared):
+ ti.block_local(m1)
+ for I in ti.grouped(pid):
+ p = pid[I]
+
+ u_ = ti.floor(x[p] * N).cast(ti.i32)
+
+ Im = ti.rescale_index(pid, m1, I)
+ u0 = ti.assume_in_range(u_[0], Im[0], 0, 1)
+ u1 = ti.assume_in_range(u_[1], Im[1], 0, 1)
+
+ u = ti.Vector([u0, u1])
+
+ tot = 0.0
+
+ for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))):
+ tot += m1[u + offset]
+
+ s[p] = tot
+
+ @ti.kernel
+ def g2p_naive(s: ti.template()):
+ ti.block_dim(256)
+ for p in x:
+ u = ti.floor(x[p] * N).cast(ti.i32)
+
+ tot = 0.0
+ for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))):
+ tot += m1[u + offset]
+ s[p] = tot
+
+ insert()
+
+ for i in range(benchmark):
+ pid.parent(2).snode.deactivate_all()
+ insert()
+
+ @ti.kernel
+ def check_m():
+ for i in range(grid_offset[0], grid_offset[0] + N):
+ for j in range(grid_offset[1], grid_offset[1] + N):
+ if abs(m1[i, j] - m3[i, j]) > 1e-4:
+ err[None] = 1
+ if abs(m2[i, j] - m3[i, j]) > 1e-4:
+ err[None] = 1
+
+ @ti.kernel
+ def check_s():
+ for i in range(M):
+ if abs(s1[i] - s2[i]) > 1e-4:
+ err[None] = 1
+ if abs(s1[i] - s3[i]) > 1e-4:
+ err[None] = 1
+
+ if scatter:
+ for i in range(max(benchmark, 1)):
+ p2g(True, m1)
+ p2g(False, m2)
+ p2g_naive()
+ check_m()
+ else:
+ for i in range(max(benchmark, 1)):
+ g2p(True, s1)
+ g2p(False, s2)
+ g2p_naive(s3)
+ check_s()
+
+ assert not err[None]
diff --git a/tests/_python_orig/ell.json b/tests/_python_orig/ell.json
new file mode 100644
index 000000000..e1e560189
--- /dev/null
+++ b/tests/_python_orig/ell.json
@@ -0,0 +1,120 @@
+{
+"num_patches" : 8,
+ "elements" : [
+{"order" : 0,
+"num" : 20,
+"max_num_per_patch" : 32,
+"owned_offsets" : [0,0,3,5,7,10,12,14,20],
+"total_offsets" : [0,20,40,60,80,100,120,140,160],
+"l2g_mapping" : [12,4,0,3,13,1,5,11,15,9,2,8,7,10,19,14,6,17,16,18,2,5,10,13,1,4,9,12,0,3,15,7,8,11,17,19,6,14,16,18,1,13,12,4,0,9,5,2,10,3,8,15,11,17,19,7,6,14,16,18,4,7,12,0,3,13,1,5,15,6,9,8,11,17,19,14,2,10,16,18,16,17,18,19,11,12,14,15,3,0,8,4,9,13,1,6,7,5,10,2,8,9,12,0,11,1,13,10,4,3,15,17,19,16,18,14,5,2,7,6,0,12,4,3,9,8,1,11,13,15,17,19,5,7,6,14,10,2,16,18,3,6,11,14,15,19,12,4,0,7,16,18,8,17,9,13,1,5,10,2],
+"g2r_mapping" : [12,3,0,14,5,1,15,6,10,11,2,16,13,4,17,18,7,8,9,19],
+"l2r_mapping" : [13,5,12,14,4,3,1,16,18,11,0,10,6,2,19,17,15,8,7,9,0,1,2,4,3,5,11,13,12,14,18,6,10,16,8,19,15,17,7,9,3,4,13,5,12,11,1,0,2,14,10,18,16,8,19,6,15,17,7,9,5,6,13,12,14,4,3,1,18,15,11,10,16,8,19,17,0,2,7,9,7,8,9,19,16,13,17,18,14,12,10,5,11,4,3,15,6,1,2,0,10,11,13,12,16,3,4,2,5,14,18,8,19,7,9,17,1,0,6,15,12,13,5,14,11,10,3,16,4,18,8,19,1,6,15,17,2,0,7,9,14,15,16,17,18,19,13,5,12,6,7,9,10,8,11,4,3,1,2,0]
+}
+,{"order" : 2,
+"num" : 66,
+"max_num_per_patch" : 96,
+"owned_offsets" : [0,8,17,23,31,42,48,56,66],
+"total_offsets" : [0,66,132,198,264,330,396,462,528],
+"l2g_mapping" : [0,3,8,10,11,15,17,18,1,2,9,16,14,43,44,45,27,25,26,20,12,19,7,13,34,32,33,4,48,49,53,54,47,24,46,37,35,36,5,6,39,38,64,57,63,42,55,56,21,22,23,60,58,59,65,50,40,41,62,30,61,52,29,51,31,28,16,32,33,34,35,36,37,38,39,15,17,8,9,11,19,20,10,12,13,14,18,0,2,3,25,27,46,47,5,6,7,1,4,26,44,45,49,53,57,58,60,64,43,48,21,23,24,41,42,54,55,22,40,56,63,65,28,29,30,50,51,59,61,62,31,52,9,12,13,14,19,20,8,10,15,16,18,32,33,35,36,38,11,17,34,37,39,1,2,3,4,6,7,26,27,44,45,49,53,57,58,60,64,0,25,46,47,5,43,48,21,23,24,41,42,54,55,22,40,56,63,65,28,29,30,50,51,59,61,62,31,52,2,22,23,24,25,27,46,47,0,3,8,10,11,15,17,18,1,4,6,7,9,12,14,20,26,44,45,49,53,57,58,60,64,5,13,43,48,21,41,42,54,55,16,19,32,34,35,37,39,33,36,38,40,56,63,65,28,29,30,50,51,59,61,62,31,52,28,29,30,31,51,52,58,59,60,61,62,50,57,63,64,65,43,44,45,48,49,53,54,55,56,1,2,3,4,6,7,9,10,11,12,14,20,26,27,40,41,42,21,22,24,25,47,0,23,46,5,13,18,8,15,17,19,36,37,16,32,34,35,39,33,38,4,5,6,7,48,49,12,13,19,20,36,37,1,2,3,9,10,11,14,26,27,44,45,53,57,58,60,64,0,18,43,28,29,30,50,51,54,55,56,59,61,63,8,15,16,32,33,35,38,17,34,39,25,46,47,21,23,24,41,42,22,40,65,62,31,52,1,26,43,44,45,53,57,64,0,3,4,5,7,13,14,18,48,2,6,9,10,11,12,20,27,49,58,60,8,15,17,25,46,47,21,23,24,41,42,54,55,19,36,37,16,32,33,35,38,28,29,30,50,51,56,59,61,63,34,39,22,40,65,62,31,52,21,40,41,42,50,54,55,56,63,65,0,1,2,23,24,25,26,43,45,46,22,28,29,30,44,48,49,51,53,57,58,59,61,52,27,47,64,31,60,62,3,4,6,7,9,10,11,12,14,20,8,15,17,18,5,13,19,36,37,16,32,34,35,39,33,38],
+"g2r_mapping" : [0,48,23,1,42,43,44,45,2,17,3,4,18,19,20,5,8,6,7,21,22,56,24,25,26,27,49,28,31,32,33,34,9,10,11,12,13,14,15,16,57,58,59,50,51,52,29,30,46,47,60,35,36,53,61,62,63,54,37,38,39,40,41,64,55,65],
+"l2r_mapping" : [0,1,2,3,4,5,6,7,48,23,17,8,20,50,51,52,28,27,49,22,18,21,45,19,11,9,10,42,46,47,53,61,30,26,29,14,12,13,43,44,16,15,55,54,64,59,62,63,56,24,25,39,37,38,65,60,57,58,41,33,40,36,32,35,34,31,8,9,10,11,12,13,14,15,16,5,6,2,17,4,21,22,3,18,19,20,7,0,23,1,27,28,29,30,43,44,45,48,42,49,51,52,47,53,54,37,39,55,50,46,56,25,26,58,59,61,62,24,57,63,64,65,31,32,33,60,35,38,40,41,34,36,17,18,19,20,21,22,2,3,5,8,7,9,10,12,13,15,4,6,11,14,16,48,23,1,42,44,45,49,28,51,52,47,53,54,37,39,55,0,27,29,30,43,50,46,56,25,26,58,59,61,62,24,57,63,64,65,31,32,33,60,35,38,40,41,34,36,23,24,25,26,27,28,29,30,0,1,2,3,4,5,6,7,48,42,44,45,17,18,20,22,49,51,52,47,53,54,37,39,55,43,19,50,46,56,58,59,61,62,8,21,9,11,12,14,16,10,13,15,57,63,64,65,31,32,33,60,35,38,40,41,34,36,31,32,33,34,35,36,37,38,39,40,41,60,54,64,55,65,50,51,52,46,47,53,61,62,63,48,23,1,42,44,45,17,3,4,18,20,22,49,28,57,58,59,56,24,26,27,30,0,25,29,43,19,7,2,5,6,21,13,14,8,9,11,12,16,10,15,42,43,44,45,46,47,18,19,21,22,13,14,48,23,1,17,3,4,20,49,28,51,52,53,54,37,39,55,0,7,50,31,32,33,60,35,61,62,63,38,40,64,2,5,8,9,10,12,15,6,11,16,27,29,30,56,25,26,58,59,24,57,65,41,34,36,48,49,50,51,52,53,54,55,0,1,42,43,45,19,20,7,46,23,44,17,3,4,18,22,28,47,37,39,2,5,6,27,29,30,56,25,26,58,59,61,62,21,13,14,8,9,10,12,15,31,32,33,60,35,63,38,40,64,11,16,24,57,65,41,34,36,56,57,58,59,60,61,62,63,64,65,0,48,23,25,26,27,49,50,52,29,24,31,32,33,51,46,47,35,53,54,37,38,40,36,28,30,55,34,39,41,1,42,44,45,17,3,4,18,20,22,2,5,6,7,43,19,21,13,14,8,9,11,12,16,10,15]
+}
+,{"order" : 1,
+"num" : 61,
+"max_num_per_patch" : 64,
+"owned_offsets" : [0,4,13,19,26,36,42,49,61],
+"total_offsets" : [0,61,122,183,244,305,366,427,488],
+"l2g_mapping" : [3,13,14,19,0,1,2,4,5,11,12,15,18,20,16,46,47,48,28,29,25,6,21,17,7,36,37,38,9,10,50,54,23,49,22,39,40,41,8,42,58,60,32,44,43,53,24,26,27,55,56,57,52,45,59,30,34,51,31,35,33,18,20,36,37,38,39,40,41,42,19,11,13,15,21,12,14,16,17,0,3,4,29,49,6,7,8,1,2,9,28,46,56,58,5,10,48,22,25,26,43,47,23,27,44,54,60,24,50,32,34,35,53,57,55,59,30,31,52,45,51,33,11,12,15,16,17,21,14,20,38,41,13,18,37,40,0,1,2,6,9,28,46,56,58,3,4,19,29,49,5,7,10,48,8,39,36,42,22,25,26,43,47,50,23,27,44,54,60,32,34,35,53,57,55,59,30,31,52,24,45,51,33,0,4,22,23,24,29,49,3,13,14,19,1,2,6,9,11,12,28,46,56,58,5,7,10,16,48,25,26,43,47,15,18,21,37,40,17,20,38,41,36,27,44,54,60,45,8,39,50,32,34,35,53,57,55,59,30,31,52,51,42,33,30,31,33,34,35,51,55,56,57,59,32,52,58,60,46,47,48,50,53,54,0,1,2,6,9,11,12,28,43,44,45,23,25,27,29,4,5,22,26,3,7,10,16,8,13,14,19,49,17,21,39,15,18,37,40,20,38,41,24,36,42,6,7,8,9,10,50,17,21,39,0,1,2,11,12,28,46,56,58,3,5,16,48,32,34,35,47,53,54,57,14,15,20,38,41,13,18,37,40,42,4,19,29,49,22,25,26,43,23,27,44,60,55,59,30,31,52,33,51,45,36,24,1,2,5,28,46,48,58,3,7,10,16,0,6,9,11,12,56,4,13,14,19,29,49,22,25,26,43,47,8,17,21,39,50,15,20,38,41,32,34,35,53,54,57,18,37,40,23,27,44,60,55,59,30,31,52,36,24,45,51,42,33,25,26,27,32,43,44,45,47,52,53,54,60,2,4,5,22,24,34,35,46,48,50,57,51,23,28,29,30,31,55,58,0,1,6,9,11,12,56,3,13,14,19,49,7,10,16,33,59,8,17,21,39,15,18,37,40,20,38,41,36,42],
+"g2r_mapping" : [19,42,43,0,20,44,36,37,38,39,40,13,14,1,2,15,16,17,4,3,5,18,21,22,23,49,50,51,45,24,26,27,52,28,29,30,6,7,8,9,10,11,12,53,54,55,46,56,47,25,41,31,57,58,59,32,33,34,48,35,60],
+"l2r_mapping" : [0,1,2,3,19,42,43,20,44,13,14,15,4,5,16,46,56,47,45,24,49,36,18,17,37,6,7,8,39,40,41,59,22,25,21,9,10,11,38,12,48,60,52,54,53,58,23,50,51,32,33,34,57,55,35,26,29,31,27,30,28,4,5,6,7,8,9,10,11,12,3,13,1,15,18,14,2,16,17,19,0,20,24,25,36,37,38,42,43,39,45,46,33,48,44,40,47,21,49,50,53,56,22,51,54,59,60,23,41,52,29,30,58,34,32,35,26,27,57,55,31,28,13,14,15,16,17,18,2,5,8,11,1,4,7,10,19,42,43,36,39,45,46,33,48,0,20,3,24,25,44,37,40,47,38,9,6,12,21,49,50,53,56,41,22,51,54,59,60,52,29,30,58,34,32,35,26,27,57,23,55,31,28,19,20,21,22,23,24,25,0,1,2,3,42,43,36,39,13,14,45,46,33,48,44,37,40,16,47,49,50,53,56,15,4,18,7,10,17,5,8,11,6,51,54,59,60,55,38,9,41,52,29,30,58,34,32,35,26,27,57,31,12,28,26,27,28,29,30,31,32,33,34,35,52,57,48,60,46,56,47,41,58,59,19,42,43,36,39,13,14,45,53,54,55,22,49,51,24,20,44,21,50,0,37,40,16,38,1,2,3,25,17,18,9,15,4,7,10,5,8,11,23,6,12,36,37,38,39,40,41,17,18,9,19,42,43,13,14,45,46,33,48,0,44,16,47,52,29,30,56,58,59,34,2,15,5,8,11,1,4,7,10,12,20,3,24,25,21,49,50,53,22,51,54,60,32,35,26,27,57,28,31,55,6,23,42,43,44,45,46,47,48,0,37,40,16,19,36,39,13,14,33,20,1,2,3,24,25,21,49,50,53,56,38,17,18,9,41,15,5,8,11,52,29,30,58,59,34,4,7,10,22,51,54,60,32,35,26,27,57,6,23,55,31,12,28,49,50,51,52,53,54,55,56,57,58,59,60,43,20,44,21,23,29,30,46,47,41,34,31,22,45,24,26,27,32,48,19,42,36,39,13,14,33,0,1,2,3,25,37,40,16,28,35,38,17,18,9,15,4,7,10,5,8,11,6,12]
+}
+,{"order" : 3,
+"num" : 24,
+"max_num_per_patch" : 32,
+"owned_offsets" : [0,4,7,9,12,16,18,21,24],
+"total_offsets" : [0,24,48,72,96,120,144,168,192],
+"l2g_mapping" : [0,2,4,5,14,8,6,3,10,16,18,15,11,1,12,22,19,7,20,23,13,21,17,9,10,11,12,4,6,2,3,5,1,0,16,14,8,18,15,22,19,7,20,23,13,21,17,9,3,6,1,2,5,11,16,4,0,12,14,10,8,18,15,22,19,7,20,23,13,21,17,9,7,8,15,0,13,18,5,14,19,22,2,3,16,23,20,4,6,1,17,21,10,11,9,12,9,17,20,21,23,22,19,18,13,8,14,7,0,15,16,5,1,2,3,4,6,10,11,12,1,16,3,14,6,5,0,18,2,11,8,22,19,4,12,15,20,23,13,10,7,21,17,9,14,18,22,0,8,16,19,20,23,5,15,1,13,21,17,2,3,7,9,4,6,10,11,12,13,19,23,7,17,18,22,15,9,8,14,20,21,0,16,5,1,2,3,4,6,10,11,12],
+"g2r_mapping" : [0,16,1,7,2,3,8,9,10,12,4,5,6,21,18,11,17,13,19,22,14,15,20,23],
+"l2r_mapping" : [0,1,2,3,18,10,8,7,4,17,19,11,5,16,6,20,22,9,14,23,21,15,13,12,4,5,6,2,8,1,7,3,16,0,17,18,10,19,11,20,22,9,14,23,21,15,13,12,7,8,16,1,3,5,17,2,0,6,18,4,10,19,11,20,22,9,14,23,21,15,13,12,9,10,11,0,21,19,3,18,22,20,1,7,17,23,14,2,8,16,13,15,4,5,12,6,12,13,14,15,23,20,22,19,21,10,18,9,0,11,17,3,16,1,7,2,8,4,5,6,16,17,7,18,8,3,0,19,1,5,10,20,22,2,6,11,14,23,21,4,9,15,13,12,18,19,20,0,10,17,22,14,23,3,11,16,21,15,13,1,7,9,12,2,8,4,5,6,21,22,23,9,13,19,20,11,12,10,18,14,15,0,17,3,16,1,7,2,8,4,5,6]
+}
+ ],
+ "relations" : [
+{"from_order" : 0,
+"to_order" : 0,
+"offset" : [0,0,4,8,12,12,20,27,27,35,39,39,43,47,51,51,55,61,61,68,79,79,87,91,101,107,115,122],
+"value" : [1,3,4,2,3,5,4,0,6,3,4,0,2,3,1,4,5,6,7,8,2,3,0,6,5,7,8,2,3,4,5,6,7,8,1,4,8,9,0,3,2,4,1,3,5,4,0,3,0,4,6,1,2,3,4,2,3,0,5,6,7,1,2,3,4,5,6,7,2,0,3,4,5,8,6,9,7,10,11,6,7,8,9,4,1,3,2,9,0,4,3,5,10,11,6,0,8,12,3,4,13,0,4,1,11,5,2,9,0,1,6,7,3,2,5,10,11,2,3,13,6,4]
+}
+,{"from_order" : 0,
+"to_order" : 1,
+"offset" : [0,0,4,8,12,12,20,27,27,35,39,39,43,47,51,51,55,61,61,68,79,79,87,91,101,107,115,122],
+"value" : [2,3,4,8,0,9,1,2,5,6,7,8,1,6,2,3,4,7,8,9,0,10,2,11,5,12,13,0,7,1,8,9,10,5,6,2,3,4,6,0,2,3,9,6,7,8,9,1,2,4,5,2,3,4,5,0,1,2,6,7,8,0,7,2,8,9,10,5,11,0,1,12,13,14,15,3,4,16,6,12,13,14,15,0,1,4,7,16,1,2,6,3,17,18,19,7,20,21,9,10,22,4,5,6,23,8,9,24,0,2,25,26,5,10,11,27,28,3,8,29,30,11]
+}
+,{"from_order" : 0,
+"to_order" : 2,
+"offset" : [0,0,5,10,15,15,30,42,42,55,60,60,65,70,75,75,80,89,89,101,123,123,138,143,162,171,186,198],
+"value" : [1,2,3,7,8,9,0,10,2,3,4,5,6,7,8,6,0,7,1,2,3,8,9,10,4,11,12,13,14,15,6,0,16,9,17,4,5,11,18,13,19,20,8,0,9,10,11,12,13,14,15,4,5,6,7,1,2,3,6,7,0,2,3,9,10,6,7,8,9,10,0,1,3,4,5,0,1,2,4,5,1,2,3,6,7,8,9,10,11,8,0,9,10,11,12,13,14,15,2,3,16,0,17,9,10,18,12,19,20,21,22,14,23,1,24,3,4,25,5,6,26,27,7,10,11,12,0,13,14,15,16,2,3,17,18,19,5,6,0,20,13,1,2,21,22,23,17,24,18,25,26,4,27,28,5,6,7,29,30,31,32,8,1,2,3,4,27,33,6,7,9,0,20,14,15,16,34,1,3,35,28,5,7,8,36,9,22,23,37,4,33,29,31,38,39,8,36,9]
+}
+,{"from_order" : 3,
+"to_order" : 3,
+"offset" : [0,3,6,8,11,11,13,15,17,17,20,23,23,25,28,30,30,32,34,36,38,38,40,42,42,45,49,52,52,54,57,60],
+"value" : [3,4,5,2,6,3,1,8,0,1,7,3,2,4,2,0,1,2,1,4,3,0,5,2,4,3,2,5,0,1,1,3,0,4,5,3,0,2,2,1,3,0,3,5,1,4,0,2,6,1,7,8,1,3,0,5,2,4,1,6]
+}
+,{"from_order" : 3,
+"to_order" : 2,
+"value" : [1,0,8,9,4,2,10,3,6,2,5,11,1,3,12,7,8,13,14,15,16,9,17,18,19,10,20,21,22,20,23,12,24,11,25,26,14,27,28,29,18,15,30,31,32,33,17,34,35,21,36,37,22,27,38,39,40,25,36,41,42,30,43,44,45,31,46,47,33,48,49,50,51,43,52,53,54,55,47,44,45,48,56,57,58,59,53,60,61,62,55,63,64,65,62,59,3,0,1,2,6,14,4,5,8,1,4,7,10,11,9,0,15,12,17,14,13,11,12,16,30,17,18,19,23,16,19,20,30,32,28,29,23,21,31,22,34,32,43,36,31,42,34,35,25,22,24,33,33,35,37,49,27,46,24,26,41,37,38,54,48,49,50,53,46,44,51,45,40,38,39,61,55,59,53,54,48,44,52,47,63,58,61,62,65,57,59,60,64,56,57,58,26,1,2,3,5,0,1,4,26,24,41,25,16,6,0,7,23,7,3,10,19,4,13,14,29,24,43,31,17,6,8,9,23,37,21,22,20,11,13,15,21,42,29,30,18,9,11,12,28,22,38,27,27,30,32,49,40,46,38,39,36,32,33,54,48,49,50,53,46,44,51,45,35,33,34,61,55,59,53,54,48,44,52,47,63,58,61,62,65,57,59,60,64,56,57,58,3,37,1,2,5,0,4,24,7,3,4,6,9,8,16,0,39,37,52,38,24,26,28,40,9,11,22,15,16,35,25,26,39,40,41,53,32,28,29,54,12,10,20,11,19,21,34,22,25,17,36,27,55,59,53,54,31,29,30,61,14,10,13,42,23,20,21,43,19,17,33,18,65,57,59,60,63,58,61,62,45,42,44,49,47,43,46,50,64,56,57,58,48,44,46,51,3,0,1,2,5,1,11,4,8,12,6,7,10,2,7,9,15,11,24,13,14,21,12,13,41,22,23,24,37,18,21,22,41,42,39,40,38,26,45,37,25,16,17,18,44,42,43,48,27,47,25,26,46,44,45,49,17,28,19,20,27,32,35,52,30,28,50,29,33,53,31,32,30,34,51,35,55,53,54,59,36,31,34,56,61,59,60,64,58,56,62,57,63,60,62,65,3,0,1,2,21,0,4,5,3,6,7,18,12,30,21,22,9,15,6,8,14,16,18,29,14,28,12,13,19,22,23,36,17,42,15,16,11,8,47,10,20,13,52,19,27,23,24,41,59,36,37,38,49,42,43,44,51,45,47,48,54,57,52,53,26,24,25,39,62,34,38,41,59,55,61,58,50,44,45,46,57,55,60,56,63,33,39,40,65,32,34,35,64,31,32,33,0,2,3,4,1,4,5,39,7,5,6,57,9,8,0,17,24,17,31,1,3,10,16,25,38,39,40,54,27,6,26,55,62,52,54,57,9,20,14,15,33,36,31,32,12,10,11,18,38,34,61,37,63,51,55,56,65,50,52,53,21,28,19,20,12,22,13,14,36,34,60,35,64,49,50,51,30,28,29,44,23,19,22,41,58,44,45,46,43,41,47,42,59,45,47,48,3,0,1,2,3,5,6,7,9,4,7,8,14,0,20,13,33,22,4,27,16,18,28,5,36,28,29,8,35,14,15,19,37,21,22,23,34,12,15,16,11,17,24,18,38,29,30,31,39,23,31,32,40,10,11,12,24,41,25,26,40,45,48,53,43,41,54,42,46,50,44,45,43,47,55,48,52,50,51,59,49,44,47,56,61,59,60,64,58,56,62,57,63,60,62,65]
+}
+,{"from_order" : 3,
+"to_order" : 1,
+"value" : [4,5,0,6,7,8,4,9,1,10,2,11,1,12,3,2,11,13,0,4,5,10,2,14,6,5,8,15,16,17,18,4,19,6,7,20,9,21,22,10,11,23,5,21,24,10,14,23,25,12,26,11,13,27,15,5,17,28,29,30,20,6,18,15,16,31,19,32,33,7,34,20,22,35,36,11,23,37,21,5,24,38,28,29,26,36,39,11,27,37,40,18,41,42,15,31,43,20,44,16,45,31,34,32,20,46,47,48,49,50,40,42,15,51,52,43,41,42,45,31,44,20,43,47,48,53,54,55,49,42,56,51,57,58,52,42,59,45,55,58,60,42,56,59,2,0,3,12,1,4,13,5,6,12,17,7,3,6,8,12,4,7,11,0,9,15,12,1,10,23,13,14,12,17,18,10,11,14,15,12,26,23,24,14,16,17,19,18,26,14,15,16,23,26,24,25,28,34,18,26,19,27,20,33,30,26,35,28,34,47,27,26,33,30,40,35,29,18,21,27,20,37,37,27,29,30,40,44,21,41,22,20,36,37,32,29,45,48,30,44,43,37,39,40,51,44,36,41,37,46,38,42,53,31,32,48,30,52,57,43,45,48,51,44,39,37,43,38,42,58,54,55,53,48,49,52,59,56,57,48,50,51,55,56,60,48,49,50,15,17,29,1,3,4,0,17,5,1,2,4,17,15,29,32,18,30,14,0,10,1,6,2,23,14,15,1,6,3,5,33,13,2,4,9,20,15,31,18,30,41,10,11,25,6,2,7,14,15,23,16,24,28,12,13,35,2,8,9,16,15,28,20,40,31,34,11,12,2,7,8,19,14,26,16,24,37,37,16,19,20,40,45,26,42,27,24,36,37,22,19,46,47,20,45,44,37,39,40,50,45,36,42,37,57,38,43,52,21,22,47,20,51,56,44,46,47,50,45,39,37,44,38,43,58,53,54,52,47,48,51,59,55,56,47,49,50,54,55,60,47,48,49,2,3,26,4,27,40,17,0,5,12,1,26,5,3,6,1,2,26,0,11,7,12,1,21,28,26,41,27,40,44,26,12,17,18,29,42,7,0,11,16,9,24,12,11,21,18,29,25,41,26,28,29,51,42,20,17,43,48,18,42,0,15,8,16,9,30,11,13,22,16,24,35,18,11,25,14,23,47,57,41,43,48,51,42,53,19,20,48,18,52,8,31,10,9,30,36,15,13,32,16,30,35,13,11,22,45,14,23,58,56,57,48,50,51,54,55,53,48,49,52,39,31,33,30,36,37,32,46,34,30,35,38,55,56,60,48,49,50,33,34,59,30,37,38,0,1,2,10,3,4,5,1,11,10,4,18,6,7,12,10,14,8,9,0,6,10,3,8,11,29,13,10,18,19,12,27,13,10,14,19,29,32,28,15,18,19,32,22,27,14,15,19,28,32,29,38,33,30,27,20,34,22,35,32,22,21,36,14,15,16,37,31,32,58,38,33,20,21,39,22,35,36,34,31,47,35,37,32,14,21,16,24,41,17,39,20,21,26,45,42,23,21,40,43,24,41,20,25,44,26,45,51,21,23,40,26,42,48,44,52,46,45,51,55,25,23,49,26,51,48,59,52,53,51,55,56,49,50,54,51,48,57,53,54,60,51,56,57,0,10,1,2,3,4,15,10,21,3,4,5,10,0,1,13,20,6,11,10,19,15,25,21,12,0,7,13,30,6,18,9,10,13,29,20,9,10,18,11,39,19,44,11,14,15,25,27,9,12,34,13,29,30,7,8,37,30,6,33,14,9,41,11,39,44,17,14,50,22,15,27,49,44,46,25,26,27,34,35,40,29,30,31,36,37,38,30,32,33,41,47,42,39,43,44,51,16,17,22,15,28,55,49,50,22,26,27,46,44,49,45,48,58,59,35,36,30,31,32,43,47,44,60,45,48,52,53,51,22,23,28,57,54,55,22,24,26,53,54,56,22,23,24,1,0,2,4,27,5,24,1,3,4,27,41,6,3,49,37,4,41,11,0,7,1,17,2,3,11,21,1,17,24,4,0,5,13,9,32,48,24,26,27,40,41,50,16,6,37,4,42,54,48,49,37,40,41,7,11,0,15,19,10,21,46,22,17,23,24,12,0,8,28,13,9,26,24,48,25,47,57,51,52,50,37,38,42,58,53,54,37,39,40,11,14,18,15,19,33,0,12,8,15,10,29,23,46,24,56,25,47,52,53,60,37,38,39,18,43,20,19,33,34,14,12,30,15,33,29,55,43,44,33,34,35,30,31,45,33,29,36,44,45,59,33,35,36,4,0,5,1,2,6,5,0,4,7,9,10,8,5,11,3,9,10,15,24,0,16,1,2,23,28,8,3,18,9,0,12,25,19,7,10,30,25,11,3,19,10,26,24,42,13,15,0,27,28,46,3,17,18,25,31,26,12,13,0,12,32,14,19,7,20,29,37,30,3,19,22,47,27,29,3,17,22,31,32,38,12,13,14,19,32,20,34,44,21,38,31,32,36,40,45,33,32,43,48,34,44,31,35,39,36,40,52,32,33,43,36,45,49,39,53,41,40,52,56,35,33,50,36,52,49,59,53,54,52,56,57,50,51,55,52,49,58,54,55,60,52,57,58]
+}
+,{"from_order" : 3,
+"to_order" : 0,
+"value" : [0,1,2,3,0,1,4,5,4,1,6,5,2,1,0,5,0,3,2,7,0,8,1,3,0,4,9,5,2,0,9,5,6,10,4,5,0,7,2,11,8,3,0,7,1,8,12,3,9,4,13,5,9,0,2,11,4,10,13,5,0,14,8,7,8,15,3,7,12,3,8,16,17,14,0,7,14,15,8,7,3,15,8,16,17,18,14,7,19,15,14,7,14,18,19,7,1,0,3,4,6,3,2,4,3,0,2,4,3,5,1,4,7,3,6,4,7,5,3,4,8,7,6,4,8,5,7,4,6,7,8,12,7,5,8,9,7,13,8,12,7,9,8,13,7,10,5,9,10,9,7,13,5,10,11,9,7,15,10,13,10,17,9,13,11,9,10,16,14,15,7,13,15,17,10,13,9,17,10,16,14,18,15,13,19,17,15,13,15,18,19,13,4,2,5,0,2,1,5,0,5,2,4,10,2,3,1,0,4,3,2,0,5,1,8,0,2,12,4,10,1,3,6,0,2,3,4,9,1,7,8,0,2,9,4,12,6,7,1,0,2,11,3,9,11,9,2,12,3,11,15,9,2,14,11,12,11,17,9,12,15,9,11,16,13,14,2,12,14,17,11,12,9,17,11,16,13,18,14,12,19,17,14,12,14,18,19,12,1,4,8,9,2,8,0,4,0,8,1,4,2,0,3,4,4,15,8,9,8,4,2,12,3,0,2,6,2,4,3,12,8,15,4,12,2,14,8,12,2,0,5,6,3,2,10,6,2,12,3,11,14,15,8,12,13,14,2,12,5,0,7,6,2,5,10,6,10,2,3,11,19,15,14,12,13,18,14,12,7,16,5,6,10,5,17,6,14,18,19,12,5,16,17,6,3,0,2,4,2,6,3,4,1,3,5,4,1,0,3,4,3,6,7,4,5,3,7,4,7,6,8,4,7,8,5,4,8,6,7,15,5,7,11,8,5,8,9,4,16,8,7,15,5,11,9,8,11,7,16,8,5,4,9,10,9,11,5,14,12,5,9,10,5,11,13,14,9,5,12,14,13,11,17,14,5,13,12,14,17,19,13,14,12,13,18,14,13,19,18,14,1,2,3,0,2,4,3,0,3,2,1,5,2,9,3,4,2,6,1,5,3,8,2,5,2,8,3,9,10,9,2,4,2,8,6,5,1,6,7,5,2,10,8,9,2,12,10,4,10,15,9,4,6,8,16,5,6,17,7,5,8,10,18,9,11,12,2,4,12,15,10,4,9,15,10,19,16,17,6,5,18,9,10,19,11,13,12,4,14,15,12,4,12,13,14,4,1,3,0,7,9,3,1,7,1,11,9,7,1,2,0,3,1,9,2,3,1,7,0,5,9,15,3,7,10,11,1,7,11,15,9,7,0,2,1,6,2,9,13,3,4,1,0,5,3,15,9,14,10,18,11,7,19,15,11,7,1,2,8,6,0,1,4,6,13,3,9,14,11,18,19,7,8,2,12,6,1,8,4,6,12,17,8,6,4,8,16,6,8,17,16,6,0,3,4,1,4,3,0,2,5,3,4,2,9,0,4,1,11,3,5,2,4,0,6,2,6,5,4,2,7,4,9,0,5,10,11,2,6,4,7,0,6,0,8,2,13,5,6,2,13,10,5,2,6,7,8,0,6,2,8,12,8,7,6,16,14,6,8,12,6,7,15,16,8,6,14,16,15,7,17,16,6,15,14,16,17,19,15,16,14,15,18,16,15,19,18,16]
+}
+,{"from_order" : 2,
+"to_order" : 3,
+"offset" : [0,1,3,5,7,8,9,10,11,11,13,15,16,17,19,20,21,22,23,23,25,27,28,30,32,33,33,35,36,37,39,41,42,43,44,44,45,47,49,50,51,52,53,55,56,57,58,58,60,61,62,64,65,66,66,68,70,71,73,75,77,79,80,80,82,83,84,86,88,90,91,93,95,96],
+"value" : [0,0,3,1,2,1,3,1,2,2,3,3,0,0,2,0,0,1,2,1,1,2,2,3,1,0,1,0,0,4,1,5,1,3,1,0,0,0,2,1,2,1,2,2,0,0,1,0,3,0,1,1,2,2,3,2,3,3,0,1,0,0,0,2,1,1,3,0,4,1,0,0,5,0,1,1,2,7,2,2,3,0,0,0,0,1,4,2,5,1,1,1,2,6,2,2]
+}
+,{"from_order" : 2,
+"to_order" : 2,
+"offset" : [0,7,18,28,38,46,52,57,64,64,73,82,87,92,101,107,112,117,122,122,132,141,148,158,168,175,175,185,190,195,204,214,223,229,234,234,239,248,257,262,268,273,281,290,295,300,305,305,314,319,325,335,340,348,348,358,369,376,388,399,410,422,429,429,438,444,449,459,469,480,488,497,507,514],
+"value" : [1,7,9,17,34,8,13,9,3,4,16,8,27,22,12,14,0,7,4,6,3,5,7,10,11,21,25,36,9,1,4,16,10,20,12,2,5,7,9,1,3,16,10,19,2,6,6,2,3,7,11,26,2,4,11,24,5,0,1,2,3,5,23,12,10,3,11,12,14,1,4,9,2,3,8,11,12,0,14,4,2,7,3,9,0,1,7,2,0,10,1,8,6,8,11,12,0,14,1,5,7,6,17,18,14,4,7,14,15,5,4,8,8,1,2,4,5,1,3,4,6,7,16,5,7,1,3,6,9,4,11,13,25,26,5,0,7,3,2,4,14,41,26,3,10,1,4,14,21,23,24,26,29,0,7,1,2,10,5,19,6,0,9,11,13,1,2,14,0,16,25,26,1,4,19,9,11,12,5,16,24,26,8,4,6,3,7,2,37,52,3,6,1,37,38,2,6,1,7,37,4,24,39,40,5,7,8,0,6,37,3,24,39,40,24,28,32,0,9,11,12,4,7,7,8,0,4,2,3,4,5,1,3,6,3,2,9,1,4,3,5,2,11,12,7,13,0,4,3,10,1,11,12,7,13,0,9,2,10,1,5,0,5,0,1,11,23,24,4,1,3,11,15,8,17,18,20,21,12,7,9,8,10,1,2,11,12,13,6,9,7,10,6,12,14,10,0,2,6,7,9,2,3,7,8,12,14,3,18,21,2,5,1,4,3,7,2,0,4,3,6,9,1,0,5,2,6,9,12,14,0,18,21,1,7,30,21,0,1,5,21,22,23,24,25,0,2,4,9,10,12,14,3,17,1,4,8,2,24,5,7,0,17,4,34,36,31,38,39,8,0,4,39,40,3,16,0,9,10,12,14,4,25,5,6,26,2,16,0,17,1,3,25,5,6,26,2,39,40,1,24,7,3,4,25,6,26,39,54,57,27,7,50,51,52,55,57,3,4,25,5,26,6,27,1,24,5,57,62,14,15,16,3,5,13,2,20,1,3,7,9,0,20,2,3,6,0,13,1,2,6,0,14,15,16,5,1,7,9,33,9,22,23,29,31,8,27,6,7,0,14,15,16,3,17,18,6,28,7,8,2,3,17,18,5,4,27,7,1,3,9,4,27,6,28,5,8,36,9,22,23,4,29,31,28,5,7,4,33,1,3,7,8,36]
+}
+,{"from_order" : 2,
+"to_order" : 1,
+"value" : [0,7,8,4,5,0,1,2,11,4,10,2,4,9,1,3,2,13,1,12,3,0,2,14,5,6,8,4,6,7,9,10,11,12,11,13,5,10,14,8,16,17,5,15,17,6,15,16,18,4,19,19,7,20,18,6,20,9,21,22,21,10,23,22,11,23,21,5,24,24,14,23,25,12,26,26,11,27,25,13,27,5,28,29,17,29,30,15,28,30,18,15,31,20,16,31,19,32,33,34,32,20,33,7,34,22,35,36,36,11,37,35,23,37,24,38,29,21,38,28,26,36,39,39,27,37,40,18,41,40,42,15,41,42,31,44,20,43,44,16,45,43,45,31,20,47,48,32,46,48,34,46,47,49,50,40,50,15,51,49,42,51,52,43,41,52,42,45,43,48,53,44,47,53,54,55,49,55,42,56,54,56,51,57,58,52,58,42,59,57,59,45,55,58,60,60,56,59,0,12,1,3,12,4,2,1,4,2,0,3,6,12,7,5,17,7,13,5,6,8,4,7,3,6,8,9,15,1,11,0,9,11,15,12,10,14,12,18,10,11,13,12,17,10,23,13,18,14,15,23,14,17,24,16,17,26,14,16,19,15,16,19,20,33,18,27,20,18,26,19,21,20,37,29,18,21,22,20,36,21,41,22,24,25,34,23,25,28,23,26,24,26,27,33,26,28,34,29,27,37,26,30,35,27,30,40,30,28,47,29,30,44,32,48,30,31,30,52,53,31,32,32,29,45,33,40,35,35,34,47,37,38,42,36,46,38,36,41,37,39,38,58,39,37,43,37,40,44,39,40,51,41,46,42,43,42,58,43,51,44,45,48,44,57,43,45,60,49,50,56,48,50,55,48,49,57,48,51,59,50,51,53,48,52,54,49,52,54,55,53,55,56,60,59,56,57,0,1,2,17,1,4,29,3,4,15,1,3,5,2,4,0,17,5,10,6,2,14,1,6,25,6,7,11,2,7,23,6,3,12,2,8,34,7,8,13,2,9,33,4,9,35,8,9,14,0,10,10,11,25,34,11,12,5,33,13,12,13,35,15,16,28,14,16,24,14,15,23,15,18,30,17,32,18,17,15,29,19,16,37,19,14,26,15,20,31,16,20,40,20,18,41,19,20,45,22,47,20,21,20,51,52,21,22,22,19,46,23,24,28,26,24,37,27,24,36,26,42,27,29,32,30,28,40,31,31,30,41,37,38,43,36,57,38,36,42,37,39,38,58,39,37,44,37,40,45,39,40,50,42,57,43,44,43,58,44,50,45,46,47,45,56,44,46,60,48,49,55,47,49,54,47,48,56,47,50,59,49,50,52,47,51,53,48,51,53,54,52,54,55,60,59,55,56,0,12,1,3,4,40,2,4,27,2,3,26,5,1,26,17,0,5,6,1,2,5,3,6,7,1,21,0,11,7,8,9,30,0,16,9,0,15,8,10,9,36,8,31,10,7,9,24,11,12,21,11,14,23,13,45,14,13,11,22,15,16,30,13,16,35,11,16,24,15,13,32,17,12,26,11,18,25,12,18,29,18,14,47,17,18,42,20,48,18,19,18,52,53,19,20,20,17,43,22,45,23,22,24,35,21,29,25,25,23,47,26,27,40,28,27,44,28,26,41,26,29,42,28,29,51,31,30,36,32,30,35,33,30,37,39,31,33,34,30,38,32,46,34,33,34,59,39,36,37,46,35,38,59,37,38,41,40,44,41,51,42,43,48,42,57,41,43,60,49,50,56,48,50,55,48,49,57,48,51,58,50,51,53,48,52,54,49,52,54,55,53,55,56,60,58,56,57,2,3,4,1,10,4,0,10,3,0,1,2,5,4,18,5,1,11,7,14,8,6,10,8,6,7,12,9,3,8,9,0,6,11,10,18,12,10,14,13,10,19,12,27,13,11,29,13,36,15,16,21,14,16,22,14,15,16,41,17,14,24,17,27,14,19,32,15,19,28,15,18,29,18,19,21,22,36,20,22,35,20,21,39,21,24,41,23,43,24,23,21,40,25,26,51,20,26,45,20,25,44,23,26,48,21,26,42,25,23,49,27,22,32,27,20,34,29,33,30,28,38,30,28,32,29,32,38,33,31,58,33,37,31,32,34,35,32,34,31,47,39,35,36,37,58,38,47,35,37,40,43,41,40,42,48,39,45,42,44,45,51,46,45,55,44,52,46,49,51,48,50,48,57,49,50,54,52,51,55,53,51,56,59,52,53,54,51,57,53,54,60,59,55,56,60,56,57,10,3,4,1,2,4,0,2,3,0,10,1,21,4,5,15,3,5,0,13,6,1,20,6,7,30,6,12,0,7,8,6,33,7,8,37,10,11,19,9,11,39,9,10,18,12,13,30,9,13,29,9,12,34,10,13,20,14,11,44,14,9,41,10,15,21,11,15,25,14,15,27,17,22,15,16,15,28,51,16,17,17,14,50,18,39,19,18,29,20,19,25,21,56,23,24,54,22,24,53,22,23,55,22,26,57,24,26,44,25,27,46,25,26,49,26,27,51,22,28,52,23,28,50,22,27,34,29,30,40,29,31,35,30,31,36,30,32,59,31,32,37,30,33,38,32,33,34,35,40,59,35,36,36,37,38,41,39,44,42,39,43,41,47,42,44,45,48,43,60,45,43,47,44,46,45,58,46,44,49,47,60,48,49,48,58,55,49,50,52,53,51,53,54,56,57,54,55,0,1,2,3,1,24,2,27,5,0,4,5,1,4,27,3,4,41,6,37,4,6,3,49,7,17,2,11,0,7,0,13,9,8,28,9,12,0,8,8,10,29,0,15,10,7,19,10,5,9,32,11,1,17,12,28,13,14,15,33,11,15,19,11,14,18,12,15,29,14,12,30,3,11,21,4,13,32,16,4,42,50,16,6,18,19,33,20,19,34,18,43,20,21,17,24,22,17,23,21,46,22,24,25,47,23,56,25,23,46,24,26,25,57,26,24,48,24,27,41,26,27,40,30,33,29,31,29,36,30,31,45,43,33,34,44,33,35,55,34,35,45,33,36,59,35,36,60,38,39,53,37,39,52,37,38,54,37,40,58,39,40,48,40,41,50,37,42,51,38,42,49,37,41,55,43,44,44,45,59,46,56,47,48,47,57,54,48,49,51,52,50,52,53,60,58,53,54,0,1,2,5,2,6,4,1,6,4,0,5,8,3,9,0,7,10,4,7,9,5,9,10,11,3,10,8,5,11,38,13,14,32,12,14,31,12,13,15,16,1,15,24,0,26,13,0,25,12,0,14,7,20,12,19,7,42,13,15,24,16,2,46,17,18,28,3,18,27,3,17,32,19,20,20,44,21,19,34,21,23,18,9,25,19,10,30,3,19,37,19,22,29,3,22,47,17,22,23,28,8,25,31,26,26,24,42,30,25,11,27,28,46,29,37,30,47,27,29,31,32,38,32,34,44,33,48,34,33,32,43,35,36,52,31,36,40,31,35,39,33,36,49,32,36,45,35,33,50,39,40,52,41,40,56,39,53,41,38,40,45,43,48,44,43,45,49,50,52,49,51,49,58,50,51,55,53,52,56,54,52,57,59,53,54,55,52,58,54,55,60,59,56,57,60,57,58]
+}
+,{"from_order" : 2,
+"to_order" : 0,
+"value" : [1,2,3,0,1,2,1,4,5,0,1,5,0,1,4,1,6,5,4,1,6,2,1,5,0,2,3,0,1,3,0,4,5,4,6,5,2,0,5,3,2,7,0,2,7,0,3,7,0,8,1,8,1,3,0,8,3,0,4,9,0,9,5,4,9,5,9,0,2,2,9,5,6,10,4,10,4,5,6,10,5,0,2,11,7,2,11,0,7,11,8,0,7,8,3,7,1,8,12,12,3,8,1,12,3,9,4,13,4,13,5,9,13,5,9,2,11,9,0,11,4,10,13,10,13,5,0,14,8,14,0,7,14,8,7,3,15,8,15,3,7,8,15,7,3,8,16,12,8,16,12,3,16,17,14,0,17,0,7,17,14,7,14,15,8,15,14,7,15,8,16,3,15,16,17,18,14,14,18,7,17,18,7,19,15,14,14,19,7,19,15,7,14,18,19,18,19,7,3,1,4,0,3,4,1,0,4,1,0,3,3,2,4,6,2,4,6,3,2,0,2,4,3,0,2,5,1,4,3,5,1,5,3,4,7,3,4,7,5,3,3,6,4,7,3,6,7,5,4,7,6,4,8,6,4,8,7,4,8,5,4,5,8,9,7,5,9,7,5,8,10,5,9,7,10,5,5,11,9,5,10,11,6,8,12,6,7,12,6,7,8,7,8,9,7,8,12,7,10,9,7,8,13,7,9,13,7,13,12,10,7,13,15,7,13,14,7,13,14,15,7,7,15,10,9,8,13,13,8,12,9,10,16,11,9,16,11,9,10,9,17,16,9,17,10,10,9,13,17,9,13,11,10,16,17,10,16,10,17,13,15,10,13,15,17,10,18,19,13,15,19,13,15,18,13,17,15,13,19,17,13,14,15,13,14,18,13,14,18,15,15,18,19,19,17,15,2,1,0,2,5,0,4,5,0,4,2,0,1,5,0,2,1,5,3,1,0,2,3,0,3,6,0,1,6,0,4,3,0,7,1,0,6,7,0,1,8,0,5,8,0,7,8,0,2,3,1,1,3,6,6,7,1,5,1,8,1,7,8,2,4,9,2,3,9,2,3,4,2,4,10,5,2,10,5,2,4,2,11,9,2,11,3,2,4,12,2,9,12,2,12,10,11,2,12,14,2,12,13,2,12,13,14,2,2,14,11,3,4,9,11,3,9,3,15,9,3,11,15,5,4,10,9,4,12,12,4,10,9,11,16,15,9,16,15,9,11,9,17,16,9,17,11,11,9,12,17,9,12,15,11,16,17,11,16,11,17,12,14,11,12,14,17,11,18,19,12,14,19,12,14,18,12,17,14,12,19,17,12,13,14,12,13,18,12,13,18,14,14,18,19,19,17,14,2,0,4,1,8,9,1,4,9,1,4,8,8,0,4,2,8,0,0,1,4,0,8,1,0,3,4,2,0,3,0,5,6,2,0,6,2,0,5,0,7,6,5,0,7,3,0,6,2,3,4,2,3,11,10,2,11,10,2,3,2,5,6,2,10,6,3,2,6,2,5,10,2,8,4,2,3,12,2,4,12,2,12,11,8,2,12,14,2,12,13,2,12,13,14,2,2,14,8,10,3,11,3,10,6,4,3,12,12,3,11,4,8,9,4,15,9,4,15,8,8,4,12,15,4,12,5,7,6,5,10,6,16,5,6,7,16,5,5,17,6,10,5,17,5,16,17,7,16,6,10,17,6,16,17,6,15,8,9,8,15,12,14,8,12,14,15,8,18,19,12,14,19,12,14,18,12,15,14,12,19,15,12,13,14,12,13,18,12,13,18,14,14,18,19,19,15,14,0,2,4,3,2,4,3,0,4,3,0,2,2,6,4,2,6,3,1,5,4,1,3,4,1,3,5,1,0,4,1,0,3,6,3,4,3,5,4,3,7,4,5,3,7,3,6,7,8,9,4,5,9,4,5,8,4,4,9,10,5,4,10,7,5,4,7,8,4,6,8,4,7,6,4,5,9,8,5,11,8,5,11,9,5,9,10,12,5,10,12,5,9,5,13,14,5,11,14,5,11,13,5,12,14,9,5,14,5,13,12,5,7,8,5,7,11,6,7,15,8,6,15,8,6,7,8,7,15,16,7,15,16,8,7,7,11,8,11,7,16,11,9,8,16,8,15,11,16,8,12,9,10,9,12,14,9,11,14,11,13,14,11,17,14,13,11,17,13,12,14,12,18,14,12,13,18,13,17,14,19,13,14,17,19,13,13,18,14,13,19,18,17,19,14,19,18,14,2,3,0,1,3,0,1,2,0,1,2,3,4,3,0,2,4,0,2,1,5,3,1,5,6,1,5,2,6,1,1,7,5,1,6,7,2,3,9,2,8,9,2,8,3,2,6,5,2,8,5,2,8,6,3,2,5,2,10,9,2,10,8,2,3,4,2,9,4,10,2,4,12,2,4,11,2,4,11,12,2,2,12,10,8,3,9,3,8,5,9,3,4,13,14,4,12,14,4,12,13,4,15,12,4,14,15,4,10,9,4,15,9,4,10,15,4,11,12,4,11,13,4,12,10,4,8,6,5,8,16,5,6,16,5,17,6,5,16,17,5,6,7,5,17,7,5,6,8,16,16,17,6,6,17,7,10,8,9,8,18,9,8,10,18,9,10,19,18,9,19,18,9,10,9,15,19,9,15,10,18,10,19,15,10,19,12,15,10,11,13,12,12,13,14,14,15,12,1,0,3,1,9,3,3,0,7,1,0,7,1,3,7,9,1,7,11,1,7,1,11,9,2,0,3,1,2,0,1,0,5,4,0,5,4,1,0,0,4,6,0,1,6,0,2,6,7,0,5,1,2,3,4,1,5,1,8,6,1,2,6,1,2,8,1,4,6,1,8,4,1,9,2,1,7,5,10,1,7,10,11,1,2,8,6,2,12,6,8,2,12,9,2,3,2,13,3,2,9,13,3,9,14,13,3,14,13,3,9,3,15,14,3,15,9,9,3,7,15,3,7,8,4,6,4,16,6,4,8,16,8,12,6,17,8,6,12,17,6,8,16,6,17,16,6,18,19,7,11,19,7,11,18,7,15,11,7,19,15,7,9,15,7,10,11,7,10,18,7,11,9,7,12,17,8,8,17,16,13,9,14,15,9,14,11,15,9,10,18,11,11,18,19,19,15,11,0,4,1,3,4,1,0,3,1,0,3,4,3,5,2,4,0,2,3,0,2,4,3,2,5,4,2,5,3,4,7,8,0,6,8,0,6,7,0,9,0,1,9,0,4,4,7,0,6,4,0,0,8,2,6,0,2,7,9,0,9,4,1,10,11,2,5,11,2,5,10,2,6,8,2,2,8,12,6,2,12,11,3,2,4,6,2,5,6,2,13,6,2,13,5,2,13,10,2,11,3,5,6,4,7,7,4,9,6,5,4,5,10,11,13,5,6,13,10,5,6,7,8,6,8,12,14,6,12,14,6,8,6,15,16,6,7,16,6,7,15,6,14,16,8,6,16,6,15,14,7,15,16,7,17,16,15,7,17,8,7,16,14,8,12,8,14,16,15,14,16,14,18,16,14,15,18,15,17,16,19,15,16,17,19,15,15,18,16,15,19,18,17,19,16,19,18,16]
+}
+,{"from_order" : 1,
+"to_order" : 3,
+"offset" : [0,2,4,7,8,8,10,12,13,15,17,18,20,22,23,23,25,29,35,37,40,42,42,46,49,51,53,54,56,57,57,59,61,62,64,66,67,69,70,72,73,73,76,78,79,81,83,84,84,90,94,96,99,104,106,108,108,114,116,118,124,126,129,130,133,135,138,142,144],
+"value" : [0,3,1,2,1,2,3,2,3,0,3,0,0,0,2,0,2,1,1,2,1,2,2,3,1,3,0,4,1,3,7,1,11,5,9,0,4,0,1,5,1,5,3,10,6,1,3,1,2,0,2,0,2,0,1,2,2,0,3,0,1,0,0,3,0,1,1,2,3,2,2,3,3,0,2,4,0,2,0,0,1,0,1,1,3,11,16,9,0,5,3,4,0,1,3,0,4,1,2,0,5,1,7,2,0,5,7,2,3,9,0,7,5,1,3,0,3,0,8,4,11,12,6,2,0,1,0,1,2,0,10,5,1,4,2,4,1,2,5,1,6,2,6,2]
+}
+,{"from_order" : 1,
+"to_order" : 2,
+"offset" : [0,3,6,10,12,12,15,18,20,23,26,28,31,34,36,36,39,43,49,52,56,59,59,64,68,71,74,76,79,81,81,84,87,89,92,95,97,100,102,105,107,107,111,114,116,119,122,124,124,130,134,137,141,147,150,153,153,159,162,165,171,174,178,180,184,187,191,195,198],
+"value" : [0,1,7,2,4,6,2,3,5,7,5,6,0,10,3,9,0,2,2,3,1,3,8,1,2,7,5,6,4,6,8,4,5,7,7,8,0,16,5,0,7,1,3,6,0,9,4,11,13,2,3,10,1,2,4,14,4,5,19,0,9,11,12,5,8,0,4,6,2,3,6,1,3,7,1,2,4,5,7,6,7,2,3,10,1,3,5,0,3,0,2,9,0,1,4,4,5,7,8,10,6,8,6,7,9,9,10,2,3,6,9,1,3,7,1,2,0,2,5,0,1,4,4,5,0,9,10,12,14,3,0,17,1,4,8,0,2,1,24,5,7,3,4,25,5,6,26,2,3,16,6,27,7,0,14,15,16,3,5,0,13,2,0,20,1,22,23,4,29,31,8,2,3,6,1,3,7,9,1,2,17,18,5,6,4,33,9,4,27,6,7,28,5,7,8,8,36,9]
+}
+,{"from_order" : 1,
+"to_order" : 1,
+"offset" : [0,13,26,40,50,50,59,69,75,84,94,102,111,121,127,127,143,160,173,186,198,209,209,226,240,250,260,266,280,290,290,299,308,314,326,338,346,355,368,380,386,386,401,412,420,433,442,454,454,470,487,500,517,536,551,567,567,581,591,601,616,628,640,648,664,675,689,705,718],
+"value" : [4,7,1,2,3,19,33,5,8,24,29,14,17,4,0,7,2,3,19,33,9,11,12,22,26,36,4,0,7,1,3,19,33,10,11,14,23,13,27,37,4,0,7,1,2,19,33,12,13,25,10,11,12,13,3,6,9,1,2,0,9,2,14,15,12,16,17,4,7,0,9,1,3,4,8,2,4,8,10,11,12,0,13,6,2,3,8,14,15,12,16,17,1,7,23,24,25,17,13,6,7,8,10,11,12,0,13,3,5,7,8,5,6,8,14,15,12,16,17,1,4,2,3,4,5,6,7,14,15,16,17,18,1,19,20,21,22,10,2,11,5,12,13,14,15,16,17,18,0,19,20,21,22,6,2,3,4,7,8,9,0,10,11,5,12,13,1,6,3,4,7,8,9,15,23,28,29,30,31,1,6,2,4,7,8,9,17,29,32,5,33,1,6,2,3,7,8,9,0,10,2,11,12,13,17,29,32,4,33,11,12,13,14,15,16,17,18,19,20,7,1,8,9,10,5,6,0,7,8,9,10,5,6,12,21,2,26,27,28,29,3,4,6,12,1,21,26,27,28,29,2,4,6,26,40,17,5,41,42,43,2,3,6,27,40,44,3,26,40,17,41,42,43,0,7,1,8,9,10,6,0,7,1,8,9,10,5,2,3,4,1,10,11,6,12,13,2,3,9,0,10,11,6,12,13,2,4,5,0,3,9,1,4,5,0,2,9,10,4,14,15,16,17,18,19,8,1,2,5,10,3,14,15,16,17,18,19,8,1,2,4,28,29,30,11,18,7,8,9,0,1,10,11,12,13,6,8,9,20,21,22,23,24,25,26,27,14,12,6,7,9,10,3,4,14,15,16,17,18,19,6,7,8,0,2,3,1,2,6,7,8,9,10,11,3,12,13,14,15,16,17,0,2,6,7,8,10,18,19,4,20,21,0,1,6,7,8,3,4,5,9,10,11,0,12,13,14,15,16,17,2,4,5,10,18,19,1,20,21,2,3,5,22,23,24,15,25,21,26,27,28,2,3,4,11,1,12,13,14,15,3,4,16,6,7,2,8,9,10,5,11,0,12,13,14,15,3,4,16,6,17,2,23,24,25,26,27,0,7,8,9,10,5,1,17,23,24,25,26,27,11,0,1,12,13,14,15,4,16,6,46,24,47,21,48,41,49,11,0,1,12,13,14,15,3,16,6,37,38,39,27,5,32,40,41,42,0,7,2,8,9,10,37,38,39,4,27,32,40,41,42,52,53,37,54,50,49,11,0,1,12,13,14,15,3,4,16,12,13,14,15,1,4,7,24,2,25,26,5,10,11,12,13,14,15,0,4,7,16,2,6,24,0,25,26,5,10,11,16,1,6,27,28,8,29,30,11,17,18,19,7,20,21,9,10,22,12,13,14,15,0,1,7,5,6,23,8,9,4,6,23,8,9,24,0,2,25,26,10,11,4,5,23,8,9,16,1,2,12,13,14,15,0,1,4,3,17,18,19,20,21,9,10,22,4,5,6,23,9,27,28,3,29,30,11,4,5,6,23,8,3,17,18,19,7,20,21,10,22,24,0,2,25,26,5,11,3,17,18,19,7,20,21,9,22,27,28,3,8,29,30,24,0,2,25,26,5,10]
+}
+,{"from_order" : 1,
+"to_order" : 0,
+"value" : [1,2,1,4,1,5,1,6,0,1,0,2,0,3,1,3,2,3,0,4,0,5,4,5,4,6,6,5,2,5,0,7,3,7,2,7,0,8,8,1,3,8,9,0,4,9,9,5,9,2,6,10,10,4,10,5,0,11,2,11,7,11,8,7,12,8,1,12,12,3,9,13,4,13,13,5,9,11,10,13,14,0,14,8,14,7,15,8,3,15,15,7,12,16,3,16,8,16,17,14,17,0,17,7,15,14,15,16,17,18,14,18,18,7,19,15,14,19,19,7,18,19,3,1,1,4,1,0,0,3,0,4,6,2,3,2,2,4,0,2,5,1,7,3,5,3,3,4,3,6,7,4,5,4,8,4,6,4,7,5,5,8,5,9,10,5,5,11,6,7,6,8,6,12,7,8,7,9,7,12,7,10,7,13,14,7,15,7,8,9,8,12,8,13,11,9,9,10,9,16,9,17,9,13,11,10,10,16,17,10,10,13,15,10,11,16,13,12,15,13,18,13,19,13,17,13,14,13,14,15,14,18,15,18,15,19,17,15,17,16,19,17,18,19,2,1,2,0,1,0,4,0,5,0,1,5,3,0,6,0,7,0,8,0,3,1,1,6,7,1,1,8,2,3,2,4,2,9,5,2,2,10,2,11,2,12,13,2,14,2,3,4,3,9,3,6,11,3,3,15,4,9,5,4,4,10,4,12,5,10,5,8,6,7,7,8,15,9,9,11,9,16,9,17,9,12,12,10,15,11,11,16,17,11,11,12,14,11,14,12,18,12,19,12,17,12,13,12,13,14,13,18,14,18,14,19,17,14,15,16,17,16,19,17,18,19,2,0,0,4,1,4,1,8,1,9,8,0,0,1,0,3,0,5,0,6,0,7,2,3,2,4,10,2,2,11,2,5,2,6,2,8,2,12,13,2,14,2,3,4,10,3,3,11,3,6,3,12,4,8,4,9,4,15,4,12,5,6,5,7,5,10,16,5,5,17,10,6,7,6,16,6,17,6,7,16,8,9,15,8,8,12,14,8,15,9,10,11,10,17,12,11,14,12,18,12,19,12,15,12,13,12,13,14,13,18,14,18,14,19,15,14,19,15,16,17,18,19,3,0,3,2,0,2,0,4,2,4,2,6,1,3,1,5,1,4,1,0,3,4,6,3,3,5,3,7,5,4,8,4,9,4,4,10,6,4,7,4,5,11,5,9,5,8,12,5,5,10,5,13,5,14,5,7,8,6,6,7,6,15,16,7,8,7,7,15,7,11,11,8,9,8,16,8,8,15,11,9,12,9,9,10,9,14,12,10,11,13,11,14,11,17,11,16,12,14,13,12,12,18,13,14,13,17,19,13,13,18,17,14,19,14,18,14,16,15,17,19,19,18,1,2,1,3,1,0,2,0,3,0,4,0,1,5,6,1,1,7,2,8,2,3,2,9,2,6,2,5,2,10,2,4,11,2,12,2,8,3,3,9,3,5,3,4,12,4,13,4,14,4,9,4,15,4,10,4,11,4,8,5,6,5,16,5,17,5,7,5,8,6,6,16,17,6,6,7,17,7,8,9,8,16,10,8,8,18,18,9,9,10,9,19,9,15,18,10,10,19,15,10,12,10,11,12,11,13,12,13,12,14,15,12,13,14,14,15,15,19,16,17,18,19,1,0,1,3,0,3,1,9,1,7,0,7,11,1,2,0,4,0,0,5,0,6,1,2,4,1,1,5,1,8,1,6,10,1,2,3,2,8,2,6,2,12,9,2,2,13,13,3,3,9,3,14,3,15,3,7,4,5,4,6,8,4,4,16,7,5,8,6,12,6,17,6,16,6,11,7,18,7,19,7,15,7,9,7,10,7,8,12,17,8,8,16,13,9,9,14,15,9,11,9,10,11,10,18,11,18,11,19,15,11,12,17,13,14,15,14,19,15,17,16,18,19,0,4,0,1,4,1,5,2,0,3,3,4,3,1,0,2,3,5,3,2,4,2,5,4,6,0,7,0,8,0,9,0,9,1,10,2,11,2,6,2,8,2,2,12,13,2,11,3,9,4,6,4,4,7,5,10,5,11,13,5,5,6,6,7,6,8,14,6,6,12,6,15,6,16,13,6,7,8,7,15,7,16,7,17,7,9,14,8,8,12,8,16,10,11,13,10,14,12,14,16,15,14,14,18,15,16,15,17,19,15,15,18,17,16,19,16,18,16,17,19,19,18]
+}
+,{"from_order" : 0,
+"to_order" : 3,
+"offset" : [0,0,2,4,6,6,14,20,20,26,28,28,30,32,34,34,36,40,40,46,58,58,66,68,78,82,90,96],
+"value" : [0,2,3,0,1,2,3,0,7,4,1,11,5,9,3,7,1,11,5,9,3,10,15,6,1,2,0,2,0,3,2,3,0,1,0,1,0,2,4,9,3,11,16,9,0,5,3,11,15,16,9,20,4,0,5,1,7,2,13,3,9,0,10,7,5,1,3,0,8,10,14,4,5,1,11,12,6,2,0,4,1,2,3,9,0,7,5,1,6,2,8,4,11,12,6,2]
+}
+ ],
+"attrs" : {
+ "x" : [
+0,0,0,1,0,0,2,0,0,0,1,0,1,1,0,2,1,0,0,2,0,1,2,0,0,0,1,1,0,1,2,0,1,0,1,1,1,1,1,2,1,1,0,2,1,1,2,1,0,1,3,1,1,3,0,2,3,1,2,3 ]}
+}
\ No newline at end of file
diff --git a/tests/_python_orig/examples/__init__.py b/tests/_python_orig/examples/__init__.py
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/tests/_python_orig/examples/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/_python_orig/examples/algorithm/test_laplace.py b/tests/_python_orig/examples/algorithm/test_laplace.py
new file mode 100644
index 000000000..e957b9ea8
--- /dev/null
+++ b/tests/_python_orig/examples/algorithm/test_laplace.py
@@ -0,0 +1,10 @@
+def test_laplace():
+ from taichi.examples.algorithm.laplace import laplace, x, y
+
+ for i in range(10):
+ x[i, i + 1] = 1.0
+
+ laplace()
+
+ for i in range(10):
+ assert y[i, i + 1] == (4.0 if i % 3 == 1 else 0.0)
diff --git a/tests/_python_orig/examples/autodiff/__init__.py b/tests/_python_orig/examples/autodiff/__init__.py
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/tests/_python_orig/examples/autodiff/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/_python_orig/examples/autodiff/test_minimization.py b/tests/_python_orig/examples/autodiff/test_minimization.py
new file mode 100644
index 000000000..90660639e
--- /dev/null
+++ b/tests/_python_orig/examples/autodiff/test_minimization.py
@@ -0,0 +1,21 @@
+import random
+
+import taichi as ti
+from tests import test_utils
+
+
+def test_minimization():
+ from taichi.examples.autodiff.minimization import (L, gradient_descent, n,
+ reduce, x, y)
+
+ for i in range(n):
+ x[i] = random.random()
+ y[i] = random.random()
+
+ for k in range(100):
+ with ti.Tape(loss=L):
+ reduce()
+ gradient_descent()
+
+ for i in range(n):
+ assert x[i] == test_utils.approx(y[i], rel=1e-2)
diff --git a/tests/_python_orig/examples/rendering/test_cornell_box.py b/tests/_python_orig/examples/rendering/test_cornell_box.py
new file mode 100644
index 000000000..152d7f2a7
--- /dev/null
+++ b/tests/_python_orig/examples/rendering/test_cornell_box.py
@@ -0,0 +1,43 @@
+import argparse
+
+import taichi as ti
+
+FRAMES = 200
+
+
+def test_cornell_box():
+ from taichi.examples.rendering.cornell_box import render, tonemap
+ for i in range(FRAMES):
+ render()
+ interval = 10
+ if i % interval == 0:
+ tonemap(i)
+
+
+def video_cornell_box(result_dir):
+ from taichi.examples.rendering.cornell_box import (render, tonemap,
+ tonemapped_buffer)
+ video_manager = ti.VideoManager(output_dir=result_dir,
+ framerate=24,
+ automatic_build=False)
+ gui = ti.GUI("Taichi Cornell Box",
+ res=800,
+ background_color=0x112F41,
+ show_gui=False)
+ for i in range(FRAMES):
+ render()
+ interval = 10
+ if i % interval == 0:
+ tonemap(i)
+
+ gui.set_image(tonemapped_buffer)
+ video_manager.write_frame(gui.get_image())
+ gui.clear()
+ video_manager.make_video(mp4=True, gif=False)
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Generate cornell_box video')
+ parser.add_argument('output_directory',
+ help='output directory of generated video')
+ video_cornell_box(parser.parse_args().output_directory)
diff --git a/tests/_python_orig/examples/rendering/test_taichi_logo.py b/tests/_python_orig/examples/rendering/test_taichi_logo.py
new file mode 100644
index 000000000..2a8e85395
--- /dev/null
+++ b/tests/_python_orig/examples/rendering/test_taichi_logo.py
@@ -0,0 +1,32 @@
+import argparse
+
+import taichi as ti
+
+FRAMES = 100
+
+
+def test_taichi_logo():
+ from taichi.examples.rendering.taichi_logo import paint
+ paint()
+
+
+def video_taichi_logo(result_dir):
+ from taichi.examples.rendering.taichi_logo import n, paint, x
+ video_manager = ti.VideoManager(output_dir=result_dir,
+ framerate=24,
+ automatic_build=False)
+ paint()
+ gui = ti.GUI('Logo', (n, n), show_gui=False)
+ for i in range(FRAMES):
+ gui.set_image(x)
+ video_manager.write_frame(gui.get_image())
+ gui.clear()
+
+ video_manager.make_video(mp4=True, gif=False)
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Generate taichi_logo video')
+ parser.add_argument('output_directory',
+ help='output directory of generated video')
+ video_taichi_logo(parser.parse_args().output_directory)
diff --git a/tests/_python_orig/examples/simulation/test_mpm99.py b/tests/_python_orig/examples/simulation/test_mpm99.py
new file mode 100644
index 000000000..297fe0236
--- /dev/null
+++ b/tests/_python_orig/examples/simulation/test_mpm99.py
@@ -0,0 +1,45 @@
+import argparse
+
+import taichi as ti
+
+FRAMES = 100
+
+
+def test_mpm99():
+ from taichi.examples.simulation.mpm99 import dt, initialize, substep
+
+ initialize()
+ for i in range(FRAMES):
+ for s in range(int(2e-3 // dt)):
+ substep()
+
+
+def video_mpm99(result_dir):
+ from taichi.examples.simulation.mpm99 import (dt, initialize, material,
+ substep, x)
+
+ video_manager = ti.VideoManager(output_dir=result_dir,
+ framerate=24,
+ automatic_build=False)
+ initialize()
+ gui = ti.GUI("Taichi MLS-MPM-99",
+ res=512,
+ background_color=0x112F41,
+ show_gui=False)
+ for i in range(FRAMES):
+ for s in range(int(2e-3 // dt)):
+ substep()
+ gui.circles(x.to_numpy(),
+ radius=1.5,
+ palette=[0x068587, 0xED553B, 0xEEEEF0],
+ palette_indices=material)
+ video_manager.write_frame(gui.get_image())
+ gui.clear()
+ video_manager.make_video(mp4=True, gif=False)
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Generate mpm99 video')
+ parser.add_argument('output_directory',
+ help='output directory of generated video')
+ video_mpm99(parser.parse_args().output_directory)
diff --git a/tests/_python_orig/fuse_test_template.py b/tests/_python_orig/fuse_test_template.py
new file mode 100644
index 000000000..db808509b
--- /dev/null
+++ b/tests/_python_orig/fuse_test_template.py
@@ -0,0 +1,91 @@
+import time
+
+import taichi as ti
+
+
+def template_fuse_dense_x2y2z(
+ size=1024**3,
+ repeat=10,
+ first_n=100,
+):
+ x = ti.field(ti.i32, shape=(size, ))
+ y = ti.field(ti.i32, shape=(size, ))
+ z = ti.field(ti.i32, shape=(size, ))
+ first_n = min(first_n, size)
+
+ @ti.kernel
+ def x_to_y():
+ for i in x:
+ y[i] = x[i] + 1
+
+ @ti.kernel
+ def y_to_z():
+ for i in x:
+ z[i] = y[i] + 4
+
+ def x_to_y_to_z():
+ x_to_y()
+ y_to_z()
+
+ for i in range(first_n):
+ x[i] = i * 10
+
+ # Simply test
+ for _ in range(repeat):
+ t = time.time()
+ x_to_y()
+ ti.sync()
+ print('x_to_y', time.time() - t)
+
+ for _ in range(repeat):
+ t = time.time()
+ y_to_z()
+ ti.sync()
+ print('y_to_z', time.time() - t)
+
+ for _ in range(repeat):
+ t = time.time()
+ x_to_y_to_z()
+ ti.sync()
+ print('fused x->y->z', time.time() - t)
+
+ for i in range(first_n):
+ assert x[i] == i * 10
+ assert y[i] == x[i] + 1
+ assert z[i] == x[i] + 5
+
+
+def template_fuse_reduction(size=1024**3, repeat=10, first_n=100):
+ x = ti.field(ti.i32, shape=(size, ))
+ first_n = min(first_n, size)
+
+ @ti.kernel
+ def reset():
+ for i in range(first_n):
+ x[i] = i * 10
+
+ @ti.kernel
+ def inc():
+ for i in x:
+ x[i] = x[i] + 1
+
+ # Simply test
+ reset()
+ ti.sync()
+ for _ in range(repeat):
+ t = time.time()
+ inc()
+ ti.sync()
+ print('single inc', time.time() - t)
+
+ reset()
+ ti.sync()
+ t = time.time()
+ for _ in range(repeat):
+ inc()
+ ti.sync()
+ duration = time.time() - t
+ print(f'fused {repeat} inc: total={duration} average={duration / repeat}')
+
+ for i in range(first_n):
+ assert x[i] == i * 10 + repeat
diff --git a/tests/_python_orig/test_abs.py b/tests/_python_orig/test_abs.py
new file mode 100644
index 000000000..a2ae2a6ba
--- /dev/null
+++ b/tests/_python_orig/test_abs.py
@@ -0,0 +1,37 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_abs():
+ x = ti.field(ti.f32)
+ y = ti.field(ti.f32)
+
+ N = 16
+
+ ti.root.dense(ti.i, N).place(x)
+ ti.root.dense(ti.i, N).place(y)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ for i in range(N):
+ x[i] = abs(y[i])
+
+ for i in range(N):
+ y[i] = i - 10
+ x.grad[i] = 1
+
+ func()
+ func.grad()
+
+ def sgn(x):
+ if x > 0:
+ return 1
+ if x < 0:
+ return -1
+ return 0
+
+ for i in range(N):
+ assert x[i] == abs(y[i])
+ assert y.grad[i] == sgn(y[i])
diff --git a/tests/_python_orig/test_ad_atomic.py b/tests/_python_orig/test_ad_atomic.py
new file mode 100644
index 000000000..fa317a79e
--- /dev/null
+++ b/tests/_python_orig/test_ad_atomic.py
@@ -0,0 +1,28 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_ad_reduce():
+ N = 16
+
+ x = ti.field(dtype=ti.f32, shape=N, needs_grad=True)
+ loss = ti.field(dtype=ti.f32, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def func():
+ for i in x:
+ loss[None] += x[i]**2
+
+ total_loss = 0
+ for i in range(N):
+ x[i] = i
+ total_loss += i * i
+
+ loss.grad[None] = 1
+ func()
+ func.grad()
+
+ assert total_loss == test_utils.approx(loss[None])
+ for i in range(N):
+ assert x.grad[i] == test_utils.approx(i * 2)
diff --git a/tests/_python_orig/test_ad_basics.py b/tests/_python_orig/test_ad_basics.py
new file mode 100644
index 000000000..a870e643a
--- /dev/null
+++ b/tests/_python_orig/test_ad_basics.py
@@ -0,0 +1,353 @@
+import functools
+
+import numpy as np
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+has_autograd = False
+
+try:
+ import autograd.numpy as np
+ from autograd import grad
+ has_autograd = True
+except:
+ pass
+
+
+def if_has_autograd(func):
+ # functools.wraps is nececssary for pytest parametrization to work
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ if has_autograd:
+ func(*args, **kwargs)
+
+ return wrapper
+
+
+# Note: test happens at v = 0.2
+def grad_test(tifunc, npfunc=None):
+ npfunc = npfunc or tifunc
+
+ print(
+ f'arch={ti.lang.impl.current_cfg().arch} default_fp={ti.lang.impl.current_cfg().default_fp}'
+ )
+ x = ti.field(ti.lang.impl.current_cfg().default_fp)
+ y = ti.field(ti.lang.impl.current_cfg().default_fp)
+
+ ti.root.dense(ti.i, 1).place(x, x.grad, y, y.grad)
+
+ @ti.kernel
+ def func():
+ for i in x:
+ y[i] = tifunc(x[i])
+
+ v = 0.234
+
+ y.grad[0] = 1
+ x[0] = v
+ func()
+ func.grad()
+
+ assert y[0] == test_utils.approx(npfunc(v), rel=1e-4)
+ assert x.grad[0] == test_utils.approx(grad(npfunc)(v), rel=1e-4)
+
+
+@if_has_autograd
+@test_utils.test()
+def test_size1():
+ x = ti.field(ti.i32)
+
+ ti.root.dense(ti.i, 1).place(x)
+
+ x[0] = 1
+ assert x[0] == 1
+
+
+@pytest.mark.parametrize('tifunc', [
+ lambda x: x,
+ lambda x: -x,
+ lambda x: x * x,
+ lambda x: x**2,
+ lambda x: x * x * x,
+ lambda x: x * x * x * x,
+ lambda x: 0.4 * x * x - 3,
+ lambda x: (x - 3) * (x - 1),
+ lambda x: (x - 3) * (x - 1) + x * x,
+])
+@if_has_autograd
+@test_utils.test()
+def test_poly(tifunc):
+ grad_test(tifunc)
+
+
+@pytest.mark.parametrize('tifunc,npfunc', [
+ (lambda x: ti.tanh(x), lambda x: np.tanh(x)),
+ (lambda x: ti.sin(x), lambda x: np.sin(x)),
+ (lambda x: ti.cos(x), lambda x: np.cos(x)),
+ (lambda x: ti.acos(x), lambda x: np.arccos(x)),
+ (lambda x: ti.asin(x), lambda x: np.arcsin(x)),
+])
+@if_has_autograd
+@test_utils.test(exclude=[ti.vulkan])
+def test_trigonometric(tifunc, npfunc):
+ grad_test(tifunc, npfunc)
+
+
+@pytest.mark.parametrize('tifunc', [
+ lambda x: 1 / x,
+ lambda x: (x + 1) / (x - 1),
+ lambda x: (x + 1) * (x + 2) / ((x - 1) * (x + 3)),
+])
+@if_has_autograd
+@test_utils.test()
+def test_frac(tifunc):
+ grad_test(tifunc)
+
+
+@pytest.mark.parametrize('tifunc,npfunc', [
+ (lambda x: ti.sqrt(x), lambda x: np.sqrt(x)),
+ (lambda x: ti.exp(x), lambda x: np.exp(x)),
+ (lambda x: ti.log(x), lambda x: np.log(x)),
+])
+@if_has_autograd
+@test_utils.test()
+def test_unary(tifunc, npfunc):
+ grad_test(tifunc, npfunc)
+
+
+@pytest.mark.parametrize('tifunc,npfunc', [
+ (lambda x: ti.min(x, 0), lambda x: np.minimum(x, 0)),
+ (lambda x: ti.min(x, 1), lambda x: np.minimum(x, 1)),
+ (lambda x: ti.min(0, x), lambda x: np.minimum(0, x)),
+ (lambda x: ti.min(1, x), lambda x: np.minimum(1, x)),
+ (lambda x: ti.max(x, 0), lambda x: np.maximum(x, 0)),
+ (lambda x: ti.max(x, 1), lambda x: np.maximum(x, 1)),
+ (lambda x: ti.max(0, x), lambda x: np.maximum(0, x)),
+ (lambda x: ti.max(1, x), lambda x: np.maximum(1, x)),
+])
+@if_has_autograd
+@test_utils.test()
+def test_minmax(tifunc, npfunc):
+ grad_test(tifunc, npfunc)
+
+
+@if_has_autograd
+@test_utils.test()
+def test_mod():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+
+ ti.root.dense(ti.i, 1).place(x, y)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ y[0] = x[0] % 3
+
+ @ti.kernel
+ def func2():
+ ti.atomic_add(y[0], x[0] % 3)
+
+ func()
+ func.grad()
+
+ func2()
+ func2.grad()
+
+
+@pytest.mark.parametrize('tifunc,npfunc', [
+ (lambda x: ti.atan2(0.4, x), lambda x: np.arctan2(0.4, x)),
+ (lambda y: ti.atan2(y, 0.4), lambda y: np.arctan2(y, 0.4)),
+])
+@if_has_autograd
+@test_utils.test()
+def test_atan2(tifunc, npfunc):
+ grad_test(tifunc, npfunc)
+
+
+@pytest.mark.parametrize('tifunc,npfunc', [
+ (lambda x: ti.atan2(0.4, x), lambda x: np.arctan2(0.4, x)),
+ (lambda y: ti.atan2(y, 0.4), lambda y: np.arctan2(y, 0.4)),
+])
+@if_has_autograd
+@test_utils.test(require=ti.extension.data64, default_fp=ti.f64)
+def test_atan2_f64(tifunc, npfunc):
+ grad_test(tifunc, npfunc)
+
+
+@pytest.mark.parametrize('tifunc,npfunc', [
+ (lambda x: 0.4**x, lambda x: np.power(0.4, x)),
+ (lambda y: y**0.4, lambda y: np.power(y, 0.4)),
+])
+@if_has_autograd
+@test_utils.test()
+def test_pow(tifunc, npfunc):
+ grad_test(tifunc, npfunc)
+
+
+@pytest.mark.parametrize('tifunc,npfunc', [
+ (lambda x: 0.4**x, lambda x: np.power(0.4, x)),
+ (lambda y: y**0.4, lambda y: np.power(y, 0.4)),
+])
+@if_has_autograd
+@test_utils.test(require=ti.extension.data64, default_fp=ti.f64)
+def test_pow_f64(tifunc, npfunc):
+ grad_test(tifunc, npfunc)
+
+
+@test_utils.test()
+def test_obey_kernel_simplicity():
+ x = ti.field(ti.f32)
+ y = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, 1).place(x, y)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ for i in x:
+ # OK: nested for loop
+ for j in ti.static(range(3)):
+ # OK: a series of non-for-loop statements
+ y[i] += x[i] * 42
+ y[i] -= x[i] * 5
+
+ y.grad[0] = 1.0
+ x[0] = 0.1
+
+ func()
+ func.grad()
+ assert x.grad[0] == test_utils.approx((42 - 5) * 3)
+
+
+@test_utils.test()
+def test_violate_kernel_simplicity1():
+ x = ti.field(ti.f32)
+ y = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, 1).place(x, y)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ for i in x:
+ y[i] = x[i] * 42
+ for j in ti.static(range(3)):
+ y[i] += x[i]
+
+ func()
+ func.grad()
+
+
+@test_utils.test()
+def test_violate_kernel_simplicity2():
+ x = ti.field(ti.f32)
+ y = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, 1).place(x, y)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ for i in x:
+ for j in ti.static(range(3)):
+ y[i] += x[i]
+ y[i] += x[i] * 42
+
+ func()
+ func.grad()
+
+
+@test_utils.test(require=ti.extension.data64)
+def test_cast():
+ @ti.kernel
+ def func():
+ print(ti.cast(ti.cast(ti.cast(1.0, ti.f64), ti.f32), ti.f64))
+
+ func()
+
+
+@test_utils.test(require=ti.extension.data64)
+def test_ad_precision_1():
+ loss = ti.field(ti.f32, shape=())
+ x = ti.field(ti.f64, shape=())
+
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ loss[None] = x[None]
+
+ loss.grad[None] = 1
+ func.grad()
+
+ assert x.grad[None] == 1
+
+
+@test_utils.test(require=ti.extension.data64)
+def test_ad_precision_2():
+ loss = ti.field(ti.f64, shape=())
+ x = ti.field(ti.f32, shape=())
+
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ loss[None] = x[None]
+
+ with ti.Tape(loss):
+ func()
+
+ assert x.grad[None] == 1
+
+
+@test_utils.test()
+def test_ad_rand():
+ loss = ti.field(dtype=ti.f32, shape=(), needs_grad=True)
+ x = ti.field(dtype=ti.f32, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def work():
+ loss[None] = x[None] * ti.random()
+
+ x[None] = 10
+ with pytest.raises(RuntimeError) as e:
+ with ti.Tape(loss):
+ work()
+ assert 'RandStmt not supported' in e.value.args[0]
+
+
+@test_utils.test(exclude=[ti.cc, ti.vulkan, ti.opengl])
+def test_ad_frac():
+ @ti.func
+ def frac(x):
+ fractional = x - ti.floor(x) if x > 0. else x - ti.ceil(x)
+ return fractional
+
+ @ti.kernel
+ def ti_frac(input_field: ti.template(), output_field: ti.template()):
+ for i in input_field:
+ output_field[i] = frac(input_field[i])**2
+
+ @ti.kernel
+ def calc_loss(input_field: ti.template(), loss: ti.template()):
+ for i in input_field:
+ loss[None] += input_field[i]
+
+ n = 10
+ field0 = ti.field(dtype=ti.f32, shape=(n, ), needs_grad=True)
+ randoms = np.random.randn(10).astype(np.float32)
+ field0.from_numpy(randoms)
+ field1 = ti.field(dtype=ti.f32, shape=(n, ), needs_grad=True)
+ loss = ti.field(dtype=ti.f32, shape=(), needs_grad=True)
+
+ with ti.Tape(loss):
+ ti_frac(field0, field1)
+ calc_loss(field1, loss)
+
+ grads = field0.grad.to_numpy()
+ expected = np.modf(randoms)[0] * 2
+ for i in range(n):
+ assert grads[i] == test_utils.approx(expected[i], rel=1e-4)
diff --git a/tests/_python_orig/test_ad_demote_dense.py b/tests/_python_orig/test_ad_demote_dense.py
new file mode 100644
index 000000000..425345385
--- /dev/null
+++ b/tests/_python_orig/test_ad_demote_dense.py
@@ -0,0 +1,14 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(exclude=[ti.metal, ti.opengl])
+def test_ad_demote_dense():
+ a = ti.field(ti.f32, shape=(7, 3, 19))
+
+ @ti.kernel
+ def inc():
+ for i, j, k in a:
+ a[i, j, k] += 1
+
+ inc.grad()
diff --git a/tests/_python_orig/test_ad_for.py b/tests/_python_orig/test_ad_for.py
new file mode 100644
index 000000000..b0b3cb6d3
--- /dev/null
+++ b/tests/_python_orig/test_ad_for.py
@@ -0,0 +1,777 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_ad_sum():
+ N = 10
+ a = ti.field(ti.f32, shape=N, needs_grad=True)
+ b = ti.field(ti.i32, shape=N)
+ p = ti.field(ti.f32, shape=N, needs_grad=True)
+
+ @ti.kernel
+ def compute_sum():
+ for i in range(N):
+ ret = 1.0
+ for j in range(b[i]):
+ ret = ret + a[i]
+ p[i] = ret
+
+ for i in range(N):
+ a[i] = 3
+ b[i] = i
+
+ compute_sum()
+
+ for i in range(N):
+ assert p[i] == 3 * b[i] + 1
+ p.grad[i] = 1
+
+ compute_sum.grad()
+
+ for i in range(N):
+ assert a.grad[i] == b[i]
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_ad_sum_local_atomic():
+ N = 10
+ a = ti.field(ti.f32, shape=N, needs_grad=True)
+ b = ti.field(ti.i32, shape=N)
+ p = ti.field(ti.f32, shape=N, needs_grad=True)
+
+ @ti.kernel
+ def compute_sum():
+ for i in range(N):
+ ret = 1.0
+ for j in range(b[i]):
+ ret += a[i]
+ p[i] = ret
+
+ for i in range(N):
+ a[i] = 3
+ b[i] = i
+
+ compute_sum()
+
+ for i in range(N):
+ assert p[i] == 3 * b[i] + 1
+ p.grad[i] = 1
+
+ compute_sum.grad()
+
+ for i in range(N):
+ assert a.grad[i] == b[i]
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_ad_power():
+ N = 10
+ a = ti.field(ti.f32, shape=N, needs_grad=True)
+ b = ti.field(ti.i32, shape=N)
+ p = ti.field(ti.f32, shape=N, needs_grad=True)
+
+ @ti.kernel
+ def power():
+ for i in range(N):
+ ret = 1.0
+ for j in range(b[i]):
+ ret = ret * a[i]
+ p[i] = ret
+
+ for i in range(N):
+ a[i] = 3
+ b[i] = i
+
+ power()
+
+ for i in range(N):
+ assert p[i] == 3**b[i]
+ p.grad[i] = 1
+
+ power.grad()
+
+ for i in range(N):
+ assert a.grad[i] == b[i] * 3**(b[i] - 1)
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_ad_fibonacci():
+ N = 15
+ a = ti.field(ti.f32, shape=N, needs_grad=True)
+ b = ti.field(ti.f32, shape=N, needs_grad=True)
+ c = ti.field(ti.i32, shape=N)
+ f = ti.field(ti.f32, shape=N, needs_grad=True)
+
+ @ti.kernel
+ def fib():
+ for i in range(N):
+ p = a[i]
+ q = b[i]
+ for j in range(c[i]):
+ p, q = q, p + q
+ f[i] = q
+
+ b.fill(1)
+
+ for i in range(N):
+ c[i] = i
+
+ fib()
+
+ for i in range(N):
+ f.grad[i] = 1
+
+ fib.grad()
+
+ for i in range(N):
+ print(a.grad[i], b.grad[i])
+ if i == 0:
+ assert a.grad[i] == 0
+ else:
+ assert a.grad[i] == f[i - 1]
+ assert b.grad[i] == f[i]
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_ad_fibonacci_index():
+ N = 5
+ M = 10
+ a = ti.field(ti.f32, shape=M, needs_grad=True)
+ b = ti.field(ti.f32, shape=M, needs_grad=True)
+ f = ti.field(ti.f32, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def fib():
+ for i in range(N):
+ p = 0
+ q = 1
+ for j in range(5):
+ p, q = q, p + q
+ b[q] += a[q]
+
+ for i in range(M):
+ f[None] += b[i]
+
+ f.grad[None] = 1
+ a.fill(1)
+
+ fib()
+ fib.grad()
+
+ for i in range(M):
+ is_fib = int(i in [1, 2, 3, 5, 8])
+ assert a.grad[i] == is_fib * N
+ assert b[i] == is_fib * N
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_ad_global_ptr():
+ N = 5
+ a = ti.field(ti.f32, shape=N, needs_grad=True)
+ b = ti.field(ti.f32, shape=N, needs_grad=True)
+ f = ti.field(ti.f32, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def task():
+ for i in range(N):
+ p = 0
+ for j in range(N):
+ b[i] += a[p]**2
+ p += 1
+
+ for i in range(N):
+ f[None] += b[i]
+
+ f.grad[None] = 1
+ for i in range(N):
+ a[i] = i
+
+ task()
+ task.grad()
+
+ for i in range(N):
+ print(a.grad[i])
+ assert a.grad[i] == 2 * i * N
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_integer_stack():
+ N = 5
+ a = ti.field(ti.f32, shape=N, needs_grad=True)
+ b = ti.field(ti.f32, shape=N, needs_grad=True)
+ c = ti.field(ti.i32, shape=N)
+ f = ti.field(ti.f32, shape=N, needs_grad=True)
+
+ @ti.kernel
+ def int_stack():
+ for i in range(N):
+ weight = 1
+ s = 0.0
+ for j in range(c[i]):
+ s += weight * a[i] + b[i]
+ weight *= 10
+ f[i] = s
+
+ a.fill(1)
+ b.fill(1)
+
+ for i in range(N):
+ c[i] = i
+
+ int_stack()
+
+ for i in range(N):
+ print(f[i])
+ f.grad[i] = 1
+
+ int_stack.grad()
+
+ t = 0
+ for i in range(N):
+ assert a.grad[i] == t
+ assert b.grad[i] == i
+ t = t * 10 + 1
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_double_for_loops():
+ N = 5
+ a = ti.field(ti.f32, shape=N, needs_grad=True)
+ b = ti.field(ti.f32, shape=N, needs_grad=True)
+ c = ti.field(ti.i32, shape=N)
+ f = ti.field(ti.f32, shape=N, needs_grad=True)
+
+ @ti.kernel
+ def double_for():
+ for i in range(N):
+ weight = 1.0
+ for j in range(c[i]):
+ weight *= a[i]
+ s = 0.0
+ for j in range(c[i] * 2):
+ s += weight + b[i]
+ f[i] = s
+
+ a.fill(2)
+ b.fill(1)
+
+ for i in range(N):
+ c[i] = i
+
+ double_for()
+
+ for i in range(N):
+ assert f[i] == 2 * i * (1 + 2**i)
+ f.grad[i] = 1
+
+ double_for.grad()
+
+ for i in range(N):
+ assert a.grad[i] == 2 * i * i * 2**(i - 1)
+ assert b.grad[i] == 2 * i
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_double_for_loops_more_nests():
+ N = 6
+ a = ti.field(ti.f32, shape=N, needs_grad=True)
+ b = ti.field(ti.f32, shape=N, needs_grad=True)
+ c = ti.field(ti.i32, shape=(N, N // 2))
+ f = ti.field(ti.f32, shape=(N, N // 2), needs_grad=True)
+
+ @ti.kernel
+ def double_for():
+ for i in range(N):
+ for k in range(N // 2):
+ weight = 1.0
+ for j in range(c[i, k]):
+ weight *= a[i]
+ s = 0.0
+ for j in range(c[i, k] * 2):
+ s += weight + b[i]
+ f[i, k] = s
+
+ a.fill(2)
+ b.fill(1)
+
+ for i in range(N):
+ for k in range(N // 2):
+ c[i, k] = i + k
+
+ double_for()
+
+ for i in range(N):
+ for k in range(N // 2):
+ assert f[i, k] == 2 * (i + k) * (1 + 2**(i + k))
+ f.grad[i, k] = 1
+
+ double_for.grad()
+
+ for i in range(N):
+ total_grad_a = 0
+ total_grad_b = 0
+ for k in range(N // 2):
+ total_grad_a += 2 * (i + k)**2 * 2**(i + k - 1)
+ total_grad_b += 2 * (i + k)
+ assert a.grad[i] == total_grad_a
+ assert b.grad[i] == total_grad_b
+
+
+@test_utils.test(require=[ti.extension.adstack, ti.extension.data64])
+def test_complex_body():
+ N = 5
+ a = ti.field(ti.f32, shape=N, needs_grad=True)
+ b = ti.field(ti.f32, shape=N, needs_grad=True)
+ c = ti.field(ti.i32, shape=N)
+ f = ti.field(ti.f32, shape=N, needs_grad=True)
+ g = ti.field(ti.f32, shape=N, needs_grad=False)
+
+ @ti.kernel
+ def complex():
+ for i in range(N):
+ weight = 2.0
+ tot = 0.0
+ tot_weight = 0.0
+ for j in range(c[i]):
+ tot_weight += weight + 1
+ tot += (weight + 1) * a[i]
+ weight = weight + 1
+ weight = weight * 4
+ weight = ti.cast(weight, ti.f64)
+ weight = ti.cast(weight, ti.f32)
+
+ g[i] = tot_weight
+ f[i] = tot
+
+ a.fill(2)
+ b.fill(1)
+
+ for i in range(N):
+ c[i] = i
+ f.grad[i] = 1
+
+ complex()
+ complex.grad()
+
+ for i in range(N):
+ assert a.grad[i] == g[i]
+
+
+@test_utils.test(require=[ti.extension.adstack, ti.extension.bls])
+def test_triple_for_loops_bls():
+ N = 8
+ M = 3
+ a = ti.field(ti.f32, shape=N, needs_grad=True)
+ b = ti.field(ti.f32, shape=2 * N, needs_grad=True)
+ f = ti.field(ti.f32, shape=(N - M, N), needs_grad=True)
+
+ @ti.kernel
+ def triple_for():
+ ti.block_local(a)
+ ti.block_local(b)
+ for i in range(N - M):
+ for k in range(N):
+ weight = 1.0
+ for j in range(M):
+ weight *= a[i + j]
+ s = 0.0
+ for j in range(2 * M):
+ s += weight + b[2 * i + j]
+ f[i, k] = s
+
+ a.fill(2)
+
+ for i in range(2 * N):
+ b[i] = i
+
+ triple_for()
+
+ for i in range(N - M):
+ for k in range(N):
+ assert f[i, k] == 2 * M * 2**M + (4 * i + 2 * M - 1) * M
+ f.grad[i, k] = 1
+
+ triple_for.grad()
+
+ for i in range(N):
+ assert a.grad[i] == 2 * M * min(min(N - i - 1, i + 1), M) * \
+ 2**(M - 1) * N
+ for i in range(N):
+ assert b.grad[i * 2] == min(min(N - i - 1, i + 1), M) * N
+ assert b.grad[i * 2 + 1] == min(min(N - i - 1, i + 1), M) * N
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_mixed_inner_loops():
+ x = ti.field(dtype=ti.f32, shape=(), needs_grad=True)
+ arr = ti.field(dtype=ti.f32, shape=(5))
+ loss = ti.field(dtype=ti.f32, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def mixed_inner_loops():
+ for i in arr:
+ loss[None] += ti.sin(x[None])
+ for j in range(2):
+ loss[None] += ti.sin(x[None]) + 1.0
+
+ loss.grad[None] = 1.0
+ x[None] = 0.0
+ mixed_inner_loops()
+ mixed_inner_loops.grad()
+
+ assert loss[None] == 10.0
+ assert x.grad[None] == 15.0
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_mixed_inner_loops_tape():
+ x = ti.field(dtype=ti.f32, shape=(), needs_grad=True)
+ arr = ti.field(dtype=ti.f32, shape=(5))
+ loss = ti.field(dtype=ti.f32, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def mixed_inner_loops_tape():
+ for i in arr:
+ loss[None] += ti.sin(x[None])
+ for j in range(2):
+ loss[None] += ti.sin(x[None]) + 1.0
+
+ x[None] = 0.0
+ with ti.Tape(loss=loss):
+ mixed_inner_loops_tape()
+
+ assert loss[None] == 10.0
+ assert x.grad[None] == 15.0
+
+
+@test_utils.test(require=ti.extension.adstack, ad_stack_size=32)
+def test_inner_loops_local_variable_fixed_stack_size_tape():
+ x = ti.field(dtype=float, shape=(), needs_grad=True)
+ arr = ti.field(dtype=float, shape=(2), needs_grad=True)
+ loss = ti.field(dtype=float, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def test_inner_loops_local_variable():
+ for i in arr:
+ for j in range(3):
+ s = 0.0
+ t = 0.0
+ for k in range(3):
+ s += ti.sin(x[None]) + 1.0
+ t += ti.sin(x[None])
+ loss[None] += s + t
+
+ x[None] = 0.0
+ with ti.Tape(loss=loss):
+ test_inner_loops_local_variable()
+
+ assert loss[None] == 18.0
+ assert x.grad[None] == 36.0
+
+
+@test_utils.test(require=ti.extension.adstack, ad_stack_size=32)
+def test_inner_loops_local_variable_fixed_stack_size_kernel_grad():
+ x = ti.field(dtype=float, shape=(), needs_grad=True)
+ arr = ti.field(dtype=float, shape=(2), needs_grad=True)
+ loss = ti.field(dtype=float, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def test_inner_loops_local_variable():
+ for i in arr:
+ for j in range(3):
+ s = 0.0
+ t = 0.0
+ for k in range(3):
+ s += ti.sin(x[None]) + 1.0
+ t += ti.sin(x[None])
+ loss[None] += s + t
+
+ loss.grad[None] = 1.0
+ x[None] = 0.0
+ test_inner_loops_local_variable()
+ test_inner_loops_local_variable.grad()
+
+ assert loss[None] == 18.0
+ assert x.grad[None] == 36.0
+
+
+@test_utils.test(require=ti.extension.adstack, ad_stack_size=0)
+def test_inner_loops_local_variable_adaptive_stack_size_tape():
+ x = ti.field(dtype=float, shape=(), needs_grad=True)
+ arr = ti.field(dtype=float, shape=(2), needs_grad=True)
+ loss = ti.field(dtype=float, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def test_inner_loops_local_variable():
+ for i in arr:
+ for j in range(3):
+ s = 0.0
+ t = 0.0
+ for k in range(3):
+ s += ti.sin(x[None]) + 1.0
+ t += ti.sin(x[None])
+ loss[None] += s + t
+
+ x[None] = 0.0
+ with ti.Tape(loss=loss):
+ test_inner_loops_local_variable()
+
+ assert loss[None] == 18.0
+ assert x.grad[None] == 36.0
+
+
+@test_utils.test(require=ti.extension.adstack, ad_stack_size=0)
+def test_inner_loops_local_variable_adaptive_stack_size_kernel_grad():
+ x = ti.field(dtype=float, shape=(), needs_grad=True)
+ arr = ti.field(dtype=float, shape=(2), needs_grad=True)
+ loss = ti.field(dtype=float, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def test_inner_loops_local_variable():
+ for i in arr:
+ for j in range(3):
+ s = 0.0
+ t = 0.0
+ for k in range(3):
+ s += ti.sin(x[None]) + 1.0
+ t += ti.sin(x[None])
+ loss[None] += s + t
+
+ loss.grad[None] = 1.0
+ x[None] = 0.0
+ test_inner_loops_local_variable()
+ test_inner_loops_local_variable.grad()
+
+ assert loss[None] == 18.0
+ assert x.grad[None] == 36.0
+
+
+@test_utils.test(require=ti.extension.adstack, ad_stack_size=0)
+def test_more_inner_loops_local_variable_adaptive_stack_size_tape():
+ x = ti.field(dtype=float, shape=(), needs_grad=True)
+ arr = ti.field(dtype=float, shape=(2), needs_grad=True)
+ loss = ti.field(dtype=float, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def test_more_inner_loops_local_variable():
+ for i in arr:
+ for j in range(2):
+ s = 0.0
+ for k in range(3):
+ u = 0.0
+ s += ti.sin(x[None]) + 1.0
+ for l in range(2):
+ u += ti.sin(x[None])
+ loss[None] += u
+ loss[None] += s
+
+ x[None] = 0.0
+ with ti.Tape(loss=loss):
+ test_more_inner_loops_local_variable()
+
+ assert loss[None] == 12.0
+ assert x.grad[None] == 36.0
+
+
+@test_utils.test(require=ti.extension.adstack, ad_stack_size=32)
+def test_more_inner_loops_local_variable_fixed_stack_size_tape():
+ x = ti.field(dtype=float, shape=(), needs_grad=True)
+ arr = ti.field(dtype=float, shape=(2), needs_grad=True)
+ loss = ti.field(dtype=float, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def test_more_inner_loops_local_variable():
+ for i in arr:
+ for j in range(2):
+ s = 0.0
+ for k in range(3):
+ u = 0.0
+ s += ti.sin(x[None]) + 1.0
+ for l in range(2):
+ u += ti.sin(x[None])
+ loss[None] += u
+ loss[None] += s
+
+ x[None] = 0.0
+ with ti.Tape(loss=loss):
+ test_more_inner_loops_local_variable()
+
+ assert loss[None] == 12.0
+ assert x.grad[None] == 36.0
+
+
+@test_utils.test(require=ti.extension.adstack,
+ ad_stack_size=32,
+ arch=[ti.cpu, ti.gpu])
+def test_stacked_inner_loops_local_variable_fixed_stack_size_kernel_grad():
+ x = ti.field(dtype=float, shape=(), needs_grad=True)
+ arr = ti.field(dtype=float, shape=(2), needs_grad=True)
+ loss = ti.field(dtype=float, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def test_stacked_inner_loops_local_variable():
+ for i in arr:
+ loss[None] += ti.sin(x[None])
+ for j in range(3):
+ s = 0.0
+ for k in range(3):
+ s += ti.sin(x[None]) + 1.0
+ loss[None] += s
+ for j in range(3):
+ s = 0.0
+ for k in range(3):
+ s += ti.sin(x[None]) + 1.0
+ loss[None] += s
+
+ loss.grad[None] = 1.0
+ x[None] = 0.0
+ test_stacked_inner_loops_local_variable()
+ test_stacked_inner_loops_local_variable.grad()
+
+ assert loss[None] == 36.0
+ assert x.grad[None] == 38.0
+
+
+@test_utils.test(require=ti.extension.adstack,
+ ad_stack_size=32,
+ arch=[ti.cpu, ti.gpu])
+def test_stacked_mixed_ib_and_non_ib_inner_loops_local_variable_fixed_stack_size_kernel_grad(
+):
+ x = ti.field(dtype=float, shape=(), needs_grad=True)
+ arr = ti.field(dtype=float, shape=(2), needs_grad=True)
+ loss = ti.field(dtype=float, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def test_stacked_mixed_ib_and_non_ib_inner_loops_local_variable():
+ for i in arr:
+ loss[None] += ti.sin(x[None])
+ for j in range(3):
+ for k in range(3):
+ loss[None] += ti.sin(x[None]) + 1.0
+ for j in range(3):
+ s = 0.0
+ for k in range(3):
+ s += ti.sin(x[None]) + 1.0
+ loss[None] += s
+ for j in range(3):
+ for k in range(3):
+ loss[None] += ti.sin(x[None]) + 1.0
+
+ loss.grad[None] = 1.0
+ x[None] = 0.0
+ test_stacked_mixed_ib_and_non_ib_inner_loops_local_variable()
+ test_stacked_mixed_ib_and_non_ib_inner_loops_local_variable.grad()
+
+ assert loss[None] == 54.0
+ assert x.grad[None] == 56.0
+
+
+@test_utils.test(require=ti.extension.adstack,
+ ad_stack_size=0,
+ arch=[ti.cpu, ti.gpu])
+def test_stacked_inner_loops_local_variable_adaptive_stack_size_kernel_grad():
+ x = ti.field(dtype=float, shape=(), needs_grad=True)
+ arr = ti.field(dtype=float, shape=(2), needs_grad=True)
+ loss = ti.field(dtype=float, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def test_stacked_inner_loops_local_variable():
+ for i in arr:
+ loss[None] += ti.sin(x[None])
+ for j in range(3):
+ s = 0.0
+ for k in range(3):
+ s += ti.sin(x[None]) + 1.0
+ loss[None] += s
+ for j in range(3):
+ s = 0.0
+ for k in range(3):
+ s += ti.sin(x[None]) + 1.0
+ loss[None] += s
+
+ loss.grad[None] = 1.0
+ x[None] = 0.0
+ test_stacked_inner_loops_local_variable()
+ test_stacked_inner_loops_local_variable.grad()
+
+ assert loss[None] == 36.0
+ assert x.grad[None] == 38.0
+
+
+@test_utils.test(require=ti.extension.adstack,
+ ad_stack_size=0,
+ arch=[ti.cpu, ti.gpu])
+def test_stacked_mixed_ib_and_non_ib_inner_loops_local_variable_adaptive_stack_size_kernel_grad(
+):
+ x = ti.field(dtype=float, shape=(), needs_grad=True)
+ arr = ti.field(dtype=float, shape=(2), needs_grad=True)
+ loss = ti.field(dtype=float, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def test_stacked_mixed_ib_and_non_ib_inner_loops_local_variable():
+ for i in arr:
+ loss[None] += ti.sin(x[None])
+ for j in range(3):
+ for k in range(3):
+ loss[None] += ti.sin(x[None]) + 1.0
+ for j in range(3):
+ s = 0.0
+ for k in range(3):
+ s += ti.sin(x[None]) + 1.0
+ loss[None] += s
+ for j in range(3):
+ for k in range(3):
+ loss[None] += ti.sin(x[None]) + 1.0
+
+ loss.grad[None] = 1.0
+ x[None] = 0.0
+ test_stacked_mixed_ib_and_non_ib_inner_loops_local_variable()
+ test_stacked_mixed_ib_and_non_ib_inner_loops_local_variable.grad()
+
+ assert loss[None] == 54.0
+ assert x.grad[None] == 56.0
+
+
+@test_utils.test(require=ti.extension.adstack,
+ ad_stack_size=0,
+ arch=[ti.cpu, ti.gpu])
+def test_large_for_loops_adaptive_stack_size():
+ x = ti.field(dtype=float, shape=(), needs_grad=True)
+ arr = ti.field(dtype=float, shape=(2), needs_grad=True)
+ loss = ti.field(dtype=float, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def test_large_loop():
+ for i in range(5):
+ for j in range(2000):
+ for k in range(1000):
+ loss[None] += ti.sin(x[None]) + 1.0
+
+ with ti.Tape(loss=loss):
+ test_large_loop()
+
+ assert loss[None] == 1e7
+ assert x.grad[None] == 1e7
+
+
+@test_utils.test(require=ti.extension.adstack,
+ ad_stack_size=1,
+ arch=[ti.cpu, ti.gpu])
+def test_large_for_loops_fixed_stack_size():
+ x = ti.field(dtype=float, shape=(), needs_grad=True)
+ arr = ti.field(dtype=float, shape=(2), needs_grad=True)
+ loss = ti.field(dtype=float, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def test_large_loop():
+ for i in range(5):
+ for j in range(2000):
+ for k in range(1000):
+ loss[None] += ti.sin(x[None]) + 1.0
+
+ with ti.Tape(loss=loss):
+ test_large_loop()
+
+ assert loss[None] == 1e7
+ assert x.grad[None] == 1e7
diff --git a/tests/_python_orig/test_ad_if.py b/tests/_python_orig/test_ad_if.py
new file mode 100644
index 000000000..b76ef6028
--- /dev/null
+++ b/tests/_python_orig/test_ad_if.py
@@ -0,0 +1,244 @@
+from taichi.lang import impl
+from taichi.lang.misc import get_host_arch_list
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_ad_if_simple():
+ x = ti.field(ti.f32, shape=())
+ y = ti.field(ti.f32, shape=())
+
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ if x[None] > 0.:
+ y[None] = x[None]
+
+ x[None] = 1
+ y.grad[None] = 1
+
+ func()
+ func.grad()
+
+ assert x.grad[None] == 1
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_ad_if():
+ x = ti.field(ti.f32, shape=2)
+ y = ti.field(ti.f32, shape=2)
+
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func(i: ti.i32):
+ if x[i] > 0:
+ y[i] = x[i]
+ else:
+ y[i] = 2 * x[i]
+
+ x[0] = 0
+ x[1] = 1
+ y.grad[0] = 1
+ y.grad[1] = 1
+
+ func(0)
+ func.grad(0)
+ func(1)
+ func.grad(1)
+
+ assert x.grad[0] == 2
+ assert x.grad[1] == 1
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_ad_if_nested():
+ n = 20
+ x = ti.field(ti.f32, shape=n)
+ y = ti.field(ti.f32, shape=n)
+ z = ti.field(ti.f32, shape=n)
+
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ for i in x:
+ if x[i] < 2:
+ if x[i] == 0:
+ y[i] = 0
+ else:
+ y[i] = z[i] * 1
+ else:
+ if x[i] == 2:
+ y[i] = z[i] * 2
+ else:
+ y[i] = z[i] * 3
+
+ z.fill(1)
+
+ for i in range(n):
+ x[i] = i % 4
+
+ func()
+ for i in range(n):
+ assert y[i] == i % 4
+ y.grad[i] = 1
+ func.grad()
+
+ for i in range(n):
+ assert z.grad[i] == i % 4
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_ad_if_mutable():
+ x = ti.field(ti.f32, shape=2)
+ y = ti.field(ti.f32, shape=2)
+
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func(i: ti.i32):
+ t = x[i]
+ if t > 0:
+ y[i] = t
+ else:
+ y[i] = 2 * t
+
+ x[0] = 0
+ x[1] = 1
+ y.grad[0] = 1
+ y.grad[1] = 1
+
+ func(0)
+ func.grad(0)
+ func(1)
+ func.grad(1)
+
+ assert x.grad[0] == 2
+ assert x.grad[1] == 1
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_ad_if_parallel():
+ x = ti.field(ti.f32, shape=2)
+ y = ti.field(ti.f32, shape=2)
+
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ for i in range(2):
+ t = x[i]
+ if t > 0:
+ y[i] = t
+ else:
+ y[i] = 2 * t
+
+ x[0] = 0
+ x[1] = 1
+ y.grad[0] = 1
+ y.grad[1] = 1
+
+ func()
+ func.grad()
+
+ assert x.grad[0] == 2
+ assert x.grad[1] == 1
+
+
+@test_utils.test(require=[ti.extension.adstack, ti.extension.data64],
+ default_fp=ti.f64)
+def test_ad_if_parallel_f64():
+ x = ti.field(ti.f64, shape=2)
+ y = ti.field(ti.f64, shape=2)
+
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ for i in range(2):
+ t = x[i]
+ if t > 0:
+ y[i] = t
+ else:
+ y[i] = 2 * t
+
+ x[0] = 0
+ x[1] = 1
+ y.grad[0] = 1
+ y.grad[1] = 1
+
+ func()
+ func.grad()
+
+ assert x.grad[0] == 2
+ assert x.grad[1] == 1
+
+
+@test_utils.test(require=ti.extension.adstack)
+def test_ad_if_parallel_complex():
+ x = ti.field(ti.f32, shape=2)
+ y = ti.field(ti.f32, shape=2)
+
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ ti.parallelize(1)
+ for i in range(2):
+ t = 0.0
+ if x[i] > 0:
+ t = 1 / x[i]
+ y[i] = t
+
+ x[0] = 0
+ x[1] = 2
+ y.grad[0] = 1
+ y.grad[1] = 1
+
+ func()
+ func.grad()
+
+ assert x.grad[0] == 0
+ assert x.grad[1] == -0.25
+
+
+@test_utils.test(require=[ti.extension.adstack, ti.extension.data64],
+ default_fp=ti.f64)
+def test_ad_if_parallel_complex_f64():
+ x = ti.field(ti.f64, shape=2)
+ y = ti.field(ti.f64, shape=2)
+
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ ti.parallelize(1)
+ for i in range(2):
+ t = 0.0
+ if x[i] > 0:
+ t = 1 / x[i]
+ y[i] = t
+
+ x[0] = 0
+ x[1] = 2
+ y.grad[0] = 1
+ y.grad[1] = 1
+
+ func()
+ func.grad()
+
+ assert x.grad[0] == 0
+ assert x.grad[1] == -0.25
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_stack():
+ @ti.kernel
+ def func():
+ impl.call_internal("test_stack")
+
+ func()
diff --git a/tests/_python_orig/test_ad_offload.py b/tests/_python_orig/test_ad_offload.py
new file mode 100644
index 000000000..945dba9c8
--- /dev/null
+++ b/tests/_python_orig/test_ad_offload.py
@@ -0,0 +1,24 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_offload_order():
+ n = 128
+ x = ti.field(ti.f32, shape=n, needs_grad=True)
+ y = ti.field(ti.f32, shape=n, needs_grad=True)
+ z = ti.field(ti.f32, shape=(), needs_grad=True)
+
+ @ti.kernel
+ def forward():
+ for i in x:
+ y[i] = x[i]
+
+ # for i in x:
+ # z[None] += y[i]
+
+ with ti.Tape(z):
+ forward()
+
+ # for i in range(n):
+ # assert x.grad[i] == 1
diff --git a/tests/_python_orig/test_aot.py b/tests/_python_orig/test_aot.py
new file mode 100644
index 000000000..4352c5f66
--- /dev/null
+++ b/tests/_python_orig/test_aot.py
@@ -0,0 +1,562 @@
+import json
+import os
+import sys
+import tempfile
+
+import numpy as np
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=ti.cc)
+def test_record():
+ with tempfile.TemporaryDirectory() as tmpdir:
+ recorded_file = os.path.join(tmpdir, 'record.yml')
+ ti.aot.start_recording(recorded_file)
+
+ loss = ti.field(float, (), needs_grad=True)
+ x = ti.field(float, 233, needs_grad=True)
+
+ @ti.kernel
+ def compute_loss():
+ for i in x:
+ loss[None] += x[i]**2
+
+ compute_loss()
+ ti.aot.stop_recording()
+
+ assert os.path.exists(recorded_file)
+
+ # Make sure kernel info is in the file
+ with open(recorded_file, 'r') as f:
+ assert 'compute_loss' in ''.join(f.readlines())
+
+
+@test_utils.test(arch=ti.opengl, max_block_dim=32)
+def test_opengl_max_block_dim():
+ density = ti.field(float, shape=(8, 8))
+
+ @ti.kernel
+ def init():
+ for i, j in density:
+ density[i, j] = 1
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ m = ti.aot.Module(ti.opengl)
+ m.add_field('density', density)
+ m.add_kernel(init)
+ m.save(tmpdir, '')
+ with open(os.path.join(tmpdir, 'metadata.json')) as json_file:
+ res = json.load(json_file)
+ gl_file_path = res['aot_data']['kernels']['init']['tasks'][0][
+ 'source_path']
+ with open(gl_file_path) as gl_file:
+ s = 'layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;\n'
+ assert s in gl_file.readlines()
+
+
+@test_utils.test(arch=[ti.opengl, ti.vulkan])
+def test_aot_field_range_hint():
+ density = ti.field(float, shape=(8, 8))
+
+ @ti.kernel
+ def init():
+ for i, j in density:
+ density[i, j] = 1
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ m = ti.aot.Module(ti.opengl)
+ m.add_field('density', density)
+ m.add_kernel(init)
+ m.save(tmpdir, '')
+ with open(os.path.join(tmpdir, 'metadata.json')) as json_file:
+ res = json.load(json_file)
+ range_hint = res['aot_data']['kernels']['init']['tasks'][0][
+ 'range_hint']
+ assert range_hint == '64'
+
+
+@test_utils.test(arch=ti.opengl)
+def test_aot_ndarray_range_hint():
+ density = ti.ndarray(dtype=ti.f32, shape=(8, 8))
+
+ @ti.kernel
+ def init(density: ti.any_arr()):
+ for i, j in density:
+ density[i, j] = 1
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ m = ti.aot.Module(ti.opengl)
+ m.add_kernel(init, (density, ))
+ m.save(tmpdir, '')
+ with open(os.path.join(tmpdir, 'metadata.json')) as json_file:
+ res = json.load(json_file)
+ range_hint = res['aot_data']['kernels']['init']['tasks'][0][
+ 'range_hint']
+ assert range_hint == 'arg 0'
+
+
+@test_utils.test(arch=ti.opengl)
+def test_element_size_alignment():
+ a = ti.field(ti.f32, shape=())
+ b = ti.Matrix.field(2, 3, ti.f32, shape=(2, 4))
+ c = ti.field(ti.i32, shape=())
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ s = ti.aot.Module(ti.lang.impl.current_cfg().arch)
+ s.add_field('a', a)
+ s.add_field('b', b)
+ s.add_field('c', c)
+ s.save(tmpdir, '')
+ with open(os.path.join(tmpdir, 'metadata.json')) as json_file:
+ res = json.load(json_file)
+ offsets = (res['aot_data']['fields'][0]['mem_offset_in_parent'],
+ res['aot_data']['fields'][1]['mem_offset_in_parent'],
+ res['aot_data']['fields'][2]['mem_offset_in_parent'])
+ assert 0 in offsets and 4 in offsets and 24 in offsets
+ assert res['aot_data']['root_buffer_size'] == 216
+
+
+@test_utils.test(arch=[ti.opengl, ti.vulkan])
+def test_save():
+ density = ti.field(float, shape=(4, 4))
+
+ @ti.kernel
+ def init():
+ for i, j in density:
+ density[i, j] = 1
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ # note ti.aot.Module(ti.opengl) is no-op according to its docstring.
+ m = ti.aot.Module(ti.lang.impl.current_cfg().arch)
+ m.add_field('density', density)
+ m.add_kernel(init)
+ m.save(tmpdir, '')
+ with open(os.path.join(tmpdir, 'metadata.json')) as json_file:
+ json.load(json_file)
+
+
+@test_utils.test(arch=ti.opengl)
+def test_save_template_kernel():
+ density = ti.field(float, shape=(4, 4))
+
+ @ti.kernel
+ def foo(n: ti.template()):
+ for i in range(n):
+ density[0, 0] += 1
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ # note ti.aot.Module(ti.opengl) is no-op according to its docstring.
+ m = ti.aot.Module(ti.lang.impl.current_cfg().arch)
+ m.add_field('density', density)
+ with m.add_kernel_template(foo) as kt:
+ kt.instantiate(n=6)
+ kt.instantiate(n=8)
+ m.save(tmpdir, '')
+ with open(os.path.join(tmpdir, 'metadata.json')) as json_file:
+ json.load(json_file)
+
+
+@test_utils.test(arch=[ti.opengl, ti.vulkan])
+def test_non_dense_snode():
+ n = 8
+ x = ti.field(dtype=ti.f32)
+ y = ti.field(dtype=ti.f32)
+ blk = ti.root.dense(ti.i, n)
+ blk.place(x)
+ blk.dense(ti.i, n).place(y)
+
+ with pytest.raises(RuntimeError, match='AOT: only supports dense field'):
+ m = ti.aot.Module(ti.lang.impl.current_cfg().arch)
+ m.add_field('x', x)
+ m.add_field('y', y)
+
+
+@test_utils.test(arch=[ti.opengl, ti.vulkan])
+def test_mpm88_aot():
+ n_particles = 8192
+ n_grid = 128
+ dx = 1 / n_grid
+ dt = 2e-4
+
+ p_rho = 1
+ p_vol = (dx * 0.5)**2
+ p_mass = p_vol * p_rho
+ gravity = 9.8
+ bound = 3
+ E = 400
+
+ x = ti.Vector.field(2, float, n_particles)
+ v = ti.Vector.field(2, float, n_particles)
+ C = ti.Matrix.field(2, 2, float, n_particles)
+ J = ti.field(float, n_particles)
+
+ grid_v = ti.Vector.field(2, float, (n_grid, n_grid))
+ grid_m = ti.field(float, (n_grid, n_grid))
+
+ @ti.kernel
+ def substep():
+ for i, j in grid_m:
+ grid_v[i, j] = [0, 0]
+ grid_m[i, j] = 0
+ for p in x:
+ Xp = x[p] / dx
+ base = int(Xp - 0.5)
+ fx = Xp - base
+ w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
+ stress = -dt * 4 * E * p_vol * (J[p] - 1) / dx**2
+ affine = ti.Matrix([[stress, 0], [0, stress]]) + p_mass * C[p]
+ for i, j in ti.static(ti.ndrange(3, 3)):
+ offset = ti.Vector([i, j])
+ dpos = (offset - fx) * dx
+ weight = w[i].x * w[j].y
+ grid_v[base +
+ offset] += weight * (p_mass * v[p] + affine @ dpos)
+ grid_m[base + offset] += weight * p_mass
+ for i, j in grid_m:
+ if grid_m[i, j] > 0:
+ grid_v[i, j] /= grid_m[i, j]
+ grid_v[i, j].y -= dt * gravity
+ if i < bound and grid_v[i, j].x < 0:
+ grid_v[i, j].x = 0
+ if i > n_grid - bound and grid_v[i, j].x > 0:
+ grid_v[i, j].x = 0
+ if j < bound and grid_v[i, j].y < 0:
+ grid_v[i, j].y = 0
+ if j > n_grid - bound and grid_v[i, j].y > 0:
+ grid_v[i, j].y = 0
+ for p in x:
+ Xp = x[p] / dx
+ base = int(Xp - 0.5)
+ fx = Xp - base
+ w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
+ new_v = ti.Vector.zero(float, 2)
+ new_C = ti.Matrix.zero(float, 2, 2)
+ for i, j in ti.static(ti.ndrange(3, 3)):
+ offset = ti.Vector([i, j])
+ dpos = (offset - fx) * dx
+ weight = w[i].x * w[j].y
+ g_v = grid_v[base + offset]
+ new_v += weight * g_v
+ new_C += 4 * weight * g_v.outer_product(dpos) / dx**2
+ v[p] = new_v
+ x[p] += dt * v[p]
+ J[p] *= 1 + dt * new_C.trace()
+ C[p] = new_C
+
+ @ti.kernel
+ def init():
+ for i in range(n_particles):
+ x[i] = [ti.random() * 0.4 + 0.2, ti.random() * 0.4 + 0.2]
+ v[i] = [0, -1]
+ J[i] = 1
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ m = ti.aot.Module(ti.lang.impl.current_cfg().arch)
+ m.add_field("x", x)
+ m.add_field("v", v)
+ m.add_field("C", C)
+ m.add_field("J", J)
+ m.add_field("grid_v", grid_v)
+ m.add_field("grid_m", grid_m)
+ m.add_kernel(substep)
+ m.add_kernel(init)
+ m.save(tmpdir, '')
+ with open(os.path.join(tmpdir, 'metadata.json')) as json_file:
+ json.load(json_file)
+
+
+@test_utils.test(arch=ti.opengl)
+def test_opengl_8_ssbo():
+ # 6 ndarrays + gtmp + args
+ n = 4
+ density1 = ti.ndarray(dtype=ti.f32, shape=(4, 4))
+ density2 = ti.ndarray(dtype=ti.f32, shape=(4, 4))
+ density3 = ti.ndarray(dtype=ti.f32, shape=(4, 4))
+ density4 = ti.ndarray(dtype=ti.f32, shape=(4, 4))
+ density5 = ti.ndarray(dtype=ti.f32, shape=(4, 4))
+ density6 = ti.ndarray(dtype=ti.f32, shape=(4, 4))
+
+ @ti.kernel
+ def init(d: ti.i32, density1: ti.any_arr(), density2: ti.any_arr(),
+ density3: ti.any_arr(), density4: ti.any_arr(),
+ density5: ti.any_arr(), density6: ti.any_arr()):
+ for i, j in density1:
+ density1[i, j] = d + 1
+ density2[i, j] = d + 2
+ density3[i, j] = d + 3
+ density4[i, j] = d + 4
+ density5[i, j] = d + 5
+ density6[i, j] = d + 6
+
+ init(0, density1, density2, density3, density4, density5, density6)
+ assert (density1.to_numpy() == (np.zeros(shape=(n, n)) + 1)).all()
+ assert (density2.to_numpy() == (np.zeros(shape=(n, n)) + 2)).all()
+ assert (density3.to_numpy() == (np.zeros(shape=(n, n)) + 3)).all()
+ assert (density4.to_numpy() == (np.zeros(shape=(n, n)) + 4)).all()
+ assert (density5.to_numpy() == (np.zeros(shape=(n, n)) + 5)).all()
+ assert (density6.to_numpy() == (np.zeros(shape=(n, n)) + 6)).all()
+
+
+@test_utils.test(arch=ti.opengl)
+def test_opengl_exceed_max_ssbo():
+ # 8 ndarrays + args > 8 (maximum allowed)
+ n = 4
+ density1 = ti.ndarray(dtype=ti.f32, shape=(n, n))
+ density2 = ti.ndarray(dtype=ti.f32, shape=(n, n))
+ density3 = ti.ndarray(dtype=ti.f32, shape=(n, n))
+ density4 = ti.ndarray(dtype=ti.f32, shape=(n, n))
+ density5 = ti.ndarray(dtype=ti.f32, shape=(n, n))
+ density6 = ti.ndarray(dtype=ti.f32, shape=(n, n))
+ density7 = ti.ndarray(dtype=ti.f32, shape=(n, n))
+ density8 = ti.ndarray(dtype=ti.f32, shape=(n, n))
+
+ @ti.kernel
+ def init(d: ti.i32, density1: ti.any_arr(), density2: ti.any_arr(),
+ density3: ti.any_arr(), density4: ti.any_arr(),
+ density5: ti.any_arr(), density6: ti.any_arr(),
+ density7: ti.any_arr(), density8: ti.any_arr()):
+ for i, j in density1:
+ density1[i, j] = d + 1
+ density2[i, j] = d + 2
+ density3[i, j] = d + 3
+ density4[i, j] = d + 4
+ density5[i, j] = d + 5
+ density6[i, j] = d + 6
+ density7[i, j] = d + 7
+ density8[i, j] = d + 8
+
+ with pytest.raises(RuntimeError):
+ init(0, density1, density2, density3, density4, density5, density6,
+ density7, density8)
+
+
+@test_utils.test(arch=[ti.opengl, ti.vulkan])
+def test_mpm99_aot():
+ quality = 1 # Use a larger value for higher-res simulations
+ n_particles, n_grid = 9000 * quality**2, 128 * quality
+ dx, inv_dx = 1 / n_grid, float(n_grid)
+ dt = 1e-4 / quality
+ p_vol, p_rho = (dx * 0.5)**2, 1
+ p_mass = p_vol * p_rho
+ E, nu = 0.1e4, 0.2 # Young's modulus and Poisson's ratio
+ mu_0, lambda_0 = E / (2 * (1 + nu)), E * nu / (
+ (1 + nu) * (1 - 2 * nu)) # Lame parameters
+ x = ti.Vector.field(2, dtype=float, shape=n_particles) # position
+ v = ti.Vector.field(2, dtype=float, shape=n_particles) # velocity
+ C = ti.Matrix.field(2, 2, dtype=float,
+ shape=n_particles) # affine velocity field
+ F = ti.Matrix.field(2, 2, dtype=float,
+ shape=n_particles) # deformation gradient
+ material = ti.field(dtype=int, shape=n_particles) # material id
+ Jp = ti.field(dtype=float, shape=n_particles) # plastic deformation
+ grid_v = ti.Vector.field(2, dtype=float,
+ shape=(n_grid,
+ n_grid)) # grid node momentum/velocity
+ grid_m = ti.field(dtype=float, shape=(n_grid, n_grid)) # grid node mass
+ grid_v_int = ti.Vector.field(2, dtype=int,
+ shape=(n_grid,
+ n_grid)) # grid node momentum/velocity
+ grid_m_int = ti.field(dtype=int, shape=(n_grid, n_grid)) # grid node mass
+
+ v_exp = 24
+ m_exp = 40
+
+ @ti.kernel
+ def substep():
+ for i, j in grid_m:
+ grid_v[i, j] = [0, 0]
+ grid_m[i, j] = 0
+ grid_v_int[i, j] = [0, 0]
+ grid_m_int[i, j] = 0
+ for p in x: # Particle state update and scatter to grid (P2G)
+ base = (x[p] * inv_dx - 0.5).cast(int)
+ fx = x[p] * inv_dx - base.cast(float)
+ # Quadratic kernels [http://mpm.graphics Eqn. 123, with x=fx, fx-1,fx-2]
+ w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
+ F[p] = (ti.Matrix.identity(float, 2) +
+ dt * C[p]) @ F[p] # deformation gradient update
+ h = ti.exp(
+ 10 * (1.0 - Jp[p])
+ ) # Hardening coefficient: snow gets harder when compressed
+ if material[p] == 1: # jelly, make it softer
+ h = 0.3
+ mu, la = mu_0 * h, lambda_0 * h
+ if material[p] == 0: # liquid
+ mu = 0.0
+ U, sig, V = ti.svd(F[p])
+ J = 1.0
+ for d in ti.static(range(2)):
+ new_sig = sig[d, d]
+ if material[p] == 2: # Snow
+ new_sig = min(max(sig[d, d], 1 - 2.5e-2),
+ 1 + 4.5e-3) # Plasticity
+ Jp[p] *= sig[d, d] / new_sig
+ sig[d, d] = new_sig
+ J *= new_sig
+ if material[
+ p] == 0: # Reset deformation gradient to avoid numerical instability
+ F[p] = ti.Matrix.identity(float, 2) * ti.sqrt(J)
+ elif material[p] == 2:
+ F[p] = U @ sig @ V.transpose(
+ ) # Reconstruct elastic deformation gradient after plasticity
+ stress = 2 * mu * (F[p] - U @ V.transpose()) @ F[p].transpose(
+ ) + ti.Matrix.identity(float, 2) * la * J * (J - 1)
+ stress = (-dt * p_vol * 4 * inv_dx * inv_dx) * stress
+ affine = stress + p_mass * C[p]
+ for i, j in ti.static(ti.ndrange(
+ 3, 3)): # Loop over 3x3 grid node neighborhood
+ offset = ti.Vector([i, j])
+ dpos = (offset.cast(float) - fx) * dx
+ weight = w[i][0] * w[j][1]
+ grid_v_int[base + offset] += int(
+ ti.floor(0.5 + weight * (p_mass * v[p] + affine @ dpos) *
+ (2.0**v_exp)))
+ grid_m_int[base + offset] += int(
+ ti.floor(0.5 + weight * p_mass * (2.0**m_exp)))
+ for i, j in grid_m:
+ if grid_m_int[i, j] > 0: # No need for epsilon here
+ # grid_v[i, j] = (1.0 / grid_m[i, j]) * grid_v[i, j] # Momentum to velocity
+ grid_v[i, j] = (2**(m_exp - v_exp) / grid_m_int[i, j]
+ ) * grid_v_int[i, j] # Momentum to velocity
+ grid_v[i, j][1] -= dt * 50 # gravity
+ if i < 3 and grid_v[i, j][0] < 0:
+ grid_v[i, j][0] = 0 # Boundary conditions
+ if i > n_grid - 3 and grid_v[i, j][0] > 0: grid_v[i, j][0] = 0
+ if j < 3 and grid_v[i, j][1] < 0: grid_v[i, j][1] = 0
+ if j > n_grid - 3 and grid_v[i, j][1] > 0: grid_v[i, j][1] = 0
+ for p in x: # grid to particle (G2P)
+ base = (x[p] * inv_dx - 0.5).cast(int)
+ fx = x[p] * inv_dx - base.cast(float)
+ w = [
+ 0.5 * (1.5 - fx)**2, 0.75 - (fx - 1.0)**2, 0.5 * (fx - 0.5)**2
+ ]
+ new_v = ti.Vector.zero(float, 2)
+ new_C = ti.Matrix.zero(float, 2, 2)
+ for i, j in ti.static(ti.ndrange(
+ 3, 3)): # loop over 3x3 grid node neighborhood
+ dpos = ti.Vector([i, j]).cast(float) - fx
+ g_v = grid_v[base + ti.Vector([i, j])]
+ weight = w[i][0] * w[j][1]
+ new_v += weight * g_v
+ new_C += 4 * inv_dx * weight * g_v.outer_product(dpos)
+ v[p], C[p] = new_v, new_C
+ x[p] += dt * v[p] # advection
+
+ group_size = n_particles // 3
+
+ @ti.kernel
+ def initialize():
+ for i in range(n_particles):
+ x[i] = [
+ ti.random() * 0.2 + 0.3 + 0.10 * (i // group_size),
+ ti.random() * 0.2 + 0.05 + 0.32 * (i // group_size)
+ ]
+ material[i] = i // group_size # 0: fluid 1: jelly 2: snow
+ v[i] = ti.Matrix([0, 0])
+ F[i] = ti.Matrix([[1, 0], [0, 1]])
+ Jp[i] = 1
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ m = ti.aot.Module(ti.lang.impl.current_cfg().arch)
+ m.add_field('x', x)
+ m.add_field('v', v)
+ m.add_field('C', C)
+ m.add_field('J', Jp)
+ m.add_field('grid_v', grid_v)
+ m.add_field('grid_m', grid_m)
+ m.add_field('grid_v_int', grid_v_int)
+ m.add_field('grid_m_int', grid_m_int)
+ m.add_field('material', material)
+ m.add_kernel(initialize)
+ m.add_kernel(substep)
+
+ m.save(tmpdir, '')
+ with open(os.path.join(tmpdir, 'metadata.json')) as json_file:
+ json.load(json_file)
+
+
+@test_utils.test(arch=ti.opengl)
+def test_mpm88_ndarray():
+ dim = 2
+ N = 64
+ n_particles = N * N
+ n_grid = 128
+ dx = 1 / n_grid
+ inv_dx = 1 / dx
+ dt = 2.0e-4
+ p_vol = (dx * 0.5)**2
+ p_rho = 1
+ p_mass = p_vol * p_rho
+ E = 400
+
+ @ti.kernel
+ def substep(x: ti.any_arr(element_dim=1), v: ti.any_arr(element_dim=1),
+ C: ti.any_arr(element_dim=2), J: ti.any_arr(),
+ grid_v: ti.any_arr(element_dim=1), grid_m: ti.any_arr()):
+ for p in x:
+ base = (x[p] * inv_dx - 0.5).cast(int)
+ fx = x[p] * inv_dx - base.cast(float)
+ w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
+ stress = -dt * p_vol * (J[p] - 1) * 4 * inv_dx * inv_dx * E
+ affine = ti.Matrix([[stress, 0], [0, stress]]) + p_mass * C[p]
+ for i in ti.static(range(3)):
+ for j in ti.static(range(3)):
+ offset = ti.Vector([i, j])
+ dpos = (offset.cast(float) - fx) * dx
+ weight = w[i][0] * w[j][1]
+ ti.atomic_add(grid_v[base + offset],
+ weight * (p_mass * v[p] + affine @ dpos))
+ ti.atomic_add(grid_m[base + offset], weight * p_mass)
+
+ for i, j in grid_m:
+ if grid_m[i, j] > 0:
+ bound = 3
+ inv_m = 1 / grid_m[i, j]
+ grid_v[i, j] = inv_m * grid_v[i, j]
+ grid_v[i, j][1] -= dt * 9.8
+ if i < bound and grid_v[i, j][0] < 0:
+ grid_v[i, j][0] = 0
+ if i > n_grid - bound and grid_v[i, j][0] > 0:
+ grid_v[i, j][0] = 0
+ if j < bound and grid_v[i, j][1] < 0:
+ grid_v[i, j][1] = 0
+ if j > n_grid - bound and grid_v[i, j][1] > 0:
+ grid_v[i, j][1] = 0
+
+ for p in x:
+ base = (x[p] * inv_dx - 0.5).cast(int)
+ fx = x[p] * inv_dx - base.cast(float)
+ w = [
+ 0.5 * (1.5 - fx)**2, 0.75 - (fx - 1.0)**2, 0.5 * (fx - 0.5)**2
+ ]
+ new_v = ti.Vector.zero(ti.f32, 2)
+ new_C = ti.Matrix.zero(ti.f32, 2, 2)
+ for i in ti.static(range(3)):
+ for j in ti.static(range(3)):
+ dpos = ti.Vector([i, j]).cast(float) - fx
+ g_v = grid_v[base + ti.Vector([i, j])]
+ weight = w[i][0] * w[j][1]
+ new_v += weight * g_v
+ new_C += 4 * weight * g_v.outer_product(dpos) * inv_dx
+ v[p] = new_v
+ x[p] += dt * v[p]
+ J[p] *= 1 + dt * new_C.trace()
+ C[p] = new_C
+
+ x = ti.Vector.ndarray(dim, ti.f32, n_particles)
+ v = ti.Vector.ndarray(dim, ti.f32, n_particles)
+ C = ti.Matrix.ndarray(dim, dim, ti.f32, n_particles)
+ J = ti.ndarray(ti.f32, n_particles)
+ grid_v = ti.Vector.ndarray(dim, ti.f32, (n_grid, n_grid))
+ grid_m = ti.ndarray(ti.f32, (n_grid, n_grid))
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ m = ti.aot.Module(ti.opengl)
+ m.add_kernel(substep, (x, v, C, J, grid_v, grid_m))
+
+ m.save(tmpdir, '')
+ with open(os.path.join(tmpdir, 'metadata.json')) as json_file:
+ json.load(json_file)
diff --git a/tests/_python_orig/test_arg_alignment.py b/tests/_python_orig/test_arg_alignment.py
new file mode 100644
index 000000000..4320f219e
--- /dev/null
+++ b/tests/_python_orig/test_arg_alignment.py
@@ -0,0 +1,23 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(exclude=[ti.opengl])
+def test_ret_write():
+ @ti.kernel
+ def func(a: ti.i16) -> ti.f32:
+ return 3.0
+
+ assert func(255) == 3.0
+
+
+@test_utils.test(exclude=[ti.opengl])
+def test_arg_read():
+ x = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func(a: ti.i8, b: ti.i32):
+ x[None] = b
+
+ func(255, 2)
+ assert x[None] == 2
diff --git a/tests/_python_orig/test_arg_check.py b/tests/_python_orig/test_arg_check.py
new file mode 100644
index 000000000..35bd2d5eb
--- /dev/null
+++ b/tests/_python_orig/test_arg_check.py
@@ -0,0 +1,40 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_argument_error():
+ x = ti.field(ti.i32)
+
+ ti.root.place(x)
+
+ try:
+
+ @ti.kernel
+ def set_i32_notype(v):
+ pass
+ except ti.TaichiSyntaxError:
+ pass
+
+ try:
+
+ @ti.kernel
+ def set_i32_args(*args):
+ pass
+ except ti.TaichiSyntaxError:
+ pass
+
+ try:
+
+ @ti.kernel
+ def set_i32_kwargs(**kwargs):
+ pass
+ except ti.TaichiSyntaxError:
+ pass
+
+ @ti.kernel
+ def set_i32(v: ti.i32):
+ x[None] = v
+
+ set_i32(123)
+ assert x[None] == 123
diff --git a/tests/_python_orig/test_arg_load.py b/tests/_python_orig/test_arg_load.py
new file mode 100644
index 000000000..427b1a64c
--- /dev/null
+++ b/tests/_python_orig/test_arg_load.py
@@ -0,0 +1,71 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_arg_load():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.f32)
+
+ ti.root.place(x, y)
+
+ @ti.kernel
+ def set_i32(v: ti.i32):
+ x[None] = v
+
+ @ti.kernel
+ def set_f32(v: ti.f32):
+ y[None] = v
+
+ set_i32(123)
+ assert x[None] == 123
+
+ set_i32(456)
+ assert x[None] == 456
+
+ set_f32(0.125)
+ assert y[None] == 0.125
+
+ set_f32(1.5)
+ assert y[None] == 1.5
+
+
+@test_utils.test(require=ti.extension.data64)
+def test_arg_load_f64():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.f32)
+
+ ti.root.place(x, y)
+
+ @ti.kernel
+ def set_f64(v: ti.f64):
+ y[None] = ti.cast(v, ti.f32)
+
+ @ti.kernel
+ def set_i64(v: ti.i64):
+ y[None] = v
+
+ set_i64(789)
+ assert y[None] == 789
+
+ set_f64(2.5)
+ assert y[None] == 2.5
+
+
+@test_utils.test()
+def test_ext_arr():
+ N = 128
+ x = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, N).place(x)
+
+ @ti.kernel
+ def set_f32(v: ti.ext_arr()):
+ for i in range(N):
+ x[i] = v[i] + i
+
+ import numpy as np
+ v = np.ones((N, ), dtype=np.float32) * 10
+ set_f32(v)
+ for i in range(N):
+ assert x[i] == 10 + i
diff --git a/tests/_python_orig/test_assert.py b/tests/_python_orig/test_assert.py
new file mode 100644
index 000000000..dd18b6d12
--- /dev/null
+++ b/tests/_python_orig/test_assert.py
@@ -0,0 +1,124 @@
+import pytest
+from taichi.lang.misc import get_host_arch_list
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_assert_minimal():
+ @ti.kernel
+ def func():
+ assert 0
+
+ @ti.kernel
+ def func2():
+ assert False
+
+ with pytest.raises(RuntimeError):
+ func()
+ with pytest.raises(RuntimeError):
+ func2()
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_assert_basic():
+ @ti.kernel
+ def func():
+ x = 20
+ assert 10 <= x < 20
+
+ with pytest.raises(RuntimeError):
+ func()
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_assert_message():
+ @ti.kernel
+ def func():
+ x = 20
+ assert 10 <= x < 20, 'Foo bar'
+
+ with pytest.raises(RuntimeError, match='Foo bar'):
+ func()
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_assert_message_formatted():
+ x = ti.field(dtype=int, shape=16)
+ x[10] = 42
+
+ @ti.kernel
+ def assert_formatted():
+ for i in x:
+ assert x[i] == 0, 'x[%d] expect=%d got=%d' % (i, 0, x[i])
+
+ @ti.kernel
+ def assert_float():
+ y = 0.5
+ assert y < 0, 'y = %f' % y
+
+ with pytest.raises(RuntimeError, match=r'x\[10\] expect=0 got=42'):
+ assert_formatted()
+ # TODO: note that we are not fully polished to be able to recover from
+ # assertion failures...
+ with pytest.raises(RuntimeError, match=r'y = 0.5'):
+ assert_float()
+
+ # success case
+ x[10] = 0
+ assert_formatted()
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_assert_ok():
+ @ti.kernel
+ def func():
+ x = 20
+ assert 10 <= x <= 20
+
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_static_assert_is_static():
+ @ti.kernel
+ def func():
+ x = 0
+ ti.static_assert(x) # Expr is not None
+
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_static_assert_message():
+ x = 3
+
+ @ti.kernel
+ def func():
+ ti.static_assert(x == 4, "Oh, no!")
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_static_assert_vector_n_ok():
+ x = ti.Vector.field(4, ti.f32, ())
+
+ @ti.kernel
+ def func():
+ ti.static_assert(x.n == 4)
+
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_static_assert_data_type_ok():
+ x = ti.field(ti.f32, ())
+
+ @ti.kernel
+ def func():
+ ti.static_assert(x.dtype == ti.f32)
+
+ func()
diff --git a/tests/_python_orig/test_assign.py b/tests/_python_orig/test_assign.py
new file mode 100644
index 000000000..182f4bdfb
--- /dev/null
+++ b/tests/_python_orig/test_assign.py
@@ -0,0 +1,84 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(debug=True)
+def test_assign_basic():
+ @ti.kernel
+ def func_basic():
+ a = 1
+ assert a == 1
+
+ func_basic()
+
+
+@test_utils.test(debug=True)
+def test_assign_unpack():
+ @ti.kernel
+ def func_unpack():
+ (a, b) = (1, 2)
+ assert a == 1
+ assert b == 2
+
+ func_unpack()
+
+
+@test_utils.test(debug=True)
+def test_assign_chained():
+ @ti.kernel
+ def func_chained():
+ a = b = 1
+ assert a == 1
+ assert b == 1
+
+ func_chained()
+
+
+@test_utils.test(debug=True)
+def test_assign_chained_unpack():
+ @ti.kernel
+ def func_chained_unpack():
+ (a, b) = (c, d) = (1, 2)
+ assert a == 1
+ assert b == 2
+ assert c == 1
+ assert d == 2
+
+ func_chained_unpack()
+
+
+@test_utils.test(debug=True)
+def test_assign_assign():
+ @ti.kernel
+ def func_assign():
+ a = 0
+ a = 1
+ assert a == 1
+
+ func_assign()
+
+
+@test_utils.test(debug=True)
+def test_assign_ann():
+ @ti.kernel
+ def func_ann():
+ a: ti.i32 = 1
+ b: ti.f32 = a
+ assert a == 1
+ assert b == 1.0
+
+ func_ann()
+
+
+@test_utils.test()
+def test_assign_ann_over():
+ @ti.kernel
+ def func_ann_over():
+ my_int = ti.i32
+ d: my_int = 2
+ d: ti.f32 = 2.0
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func_ann_over()
diff --git a/tests/_python_orig/test_ast_refactor.py b/tests/_python_orig/test_ast_refactor.py
new file mode 100644
index 000000000..448568dd3
--- /dev/null
+++ b/tests/_python_orig/test_ast_refactor.py
@@ -0,0 +1,1015 @@
+import numpy as np
+import pytest
+from taichi.lang import impl
+from taichi.lang.util import has_pytorch
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_binop():
+ @ti.kernel
+ def foo(x: ti.i32, y: ti.i32, a: ti.template()):
+ a[0] = x + y
+ a[1] = x - y
+ a[2] = x * y
+ a[3] = impl.ti_float(x) / y
+ a[4] = x // y
+ a[5] = x % y
+ a[6] = x**y
+ a[7] = x << y
+ a[8] = x >> y
+ a[9] = x | y
+ a[10] = x ^ y
+ a[11] = x & y
+
+ x = 37
+ y = 3
+ a = ti.field(ti.f32, shape=(12, ))
+ b = ti.field(ti.f32, shape=(12, ))
+
+ a[0] = x + y
+ a[1] = x - y
+ a[2] = x * y
+ a[3] = x / y
+ a[4] = x // y
+ a[5] = x % y
+ a[6] = x**y
+ a[7] = x << y
+ a[8] = x >> y
+ a[9] = x | y
+ a[10] = x ^ y
+ a[11] = x & y
+
+ foo(x, y, b)
+
+ for i in range(12):
+ assert a[i] == test_utils.approx(b[i])
+
+
+@test_utils.test()
+def test_augassign():
+ @ti.kernel
+ def foo(x: ti.i32, y: ti.i32, a: ti.template(), b: ti.template()):
+ for i in a:
+ a[i] = x
+ a[0] += y
+ a[1] -= y
+ a[2] *= y
+ a[3] //= y
+ a[4] %= y
+ a[5] **= y
+ a[6] <<= y
+ a[7] >>= y
+ a[8] |= y
+ a[9] ^= y
+ a[10] &= y
+ b[0] = x
+ b[0] /= y
+
+ x = 37
+ y = 3
+ a = ti.field(ti.i32, shape=(11, ))
+ b = ti.field(ti.i32, shape=(11, ))
+ c = ti.field(ti.f32, shape=(1, ))
+ d = ti.field(ti.f32, shape=(1, ))
+
+ a[0] = x + y
+ a[1] = x - y
+ a[2] = x * y
+ a[3] = x // y
+ a[4] = x % y
+ a[5] = x**y
+ a[6] = x << y
+ a[7] = x >> y
+ a[8] = x | y
+ a[9] = x ^ y
+ a[10] = x & y
+ c[0] = x / y
+
+ foo(x, y, b, d)
+
+ for i in range(11):
+ assert a[i] == b[i]
+ assert c[0] == test_utils.approx(d[0])
+
+
+@test_utils.test()
+def test_unaryop():
+ @ti.kernel
+ def foo(x: ti.i32, a: ti.template()):
+ a[0] = +x
+ a[1] = -x
+ a[2] = not x
+ a[3] = ~x
+
+ x = 1234
+ a = ti.field(ti.i32, shape=(4, ))
+ b = ti.field(ti.i32, shape=(4, ))
+
+ a[0] = +x
+ a[1] = -x
+ a[2] = not x
+ a[3] = ~x
+
+ foo(x, b)
+
+ for i in range(4):
+ assert a[i] == b[i]
+
+
+@test_utils.test()
+def test_boolop():
+ @ti.kernel
+ def foo(a: ti.template()):
+ a[0] = 0 and 0
+ a[1] = 0 and 1
+ a[2] = 1 and 0
+ a[3] = 1 and 1
+ a[4] = 0 or 0
+ a[5] = 0 or 1
+ a[6] = 1 or 0
+ a[7] = 1 or 1
+ a[8] = 1 and 1 and 1 and 1
+ a[9] = 1 and 1 and 1 and 0
+ a[10] = 0 or 0 or 0 or 0
+ a[11] = 0 or 0 or 1 or 0
+
+ a = ti.field(ti.i32, shape=(12, ))
+ b = ti.field(ti.i32, shape=(12, ))
+
+ a[0] = 0 and 0
+ a[1] = 0 and 1
+ a[2] = 1 and 0
+ a[3] = 1 and 1
+ a[4] = 0 or 0
+ a[5] = 0 or 1
+ a[6] = 1 or 0
+ a[7] = 1 or 1
+ a[8] = 1 and 1 and 1 and 1
+ a[9] = 1 and 1 and 1 and 0
+ a[10] = 0 or 0 or 0 or 0
+ a[11] = 0 or 0 or 1 or 0
+
+ foo(b)
+
+ for i in range(12):
+ assert a[i] == b[i]
+
+
+@test_utils.test()
+def test_compare_fail():
+ with pytest.raises(ti.TaichiCompilationError,
+ match='"Is" is not supported in Taichi kernels.'):
+
+ @ti.kernel
+ def foo():
+ 1 is [1]
+
+ foo()
+
+
+@test_utils.test()
+def test_single_compare():
+ @ti.kernel
+ def foo(a: ti.template(), b: ti.template(), c: ti.template()):
+ for i in ti.static(range(3)):
+ c[i * 6] = a[i] == b[i]
+ c[i * 6 + 1] = a[i] != b[i]
+ c[i * 6 + 2] = a[i] < b[i]
+ c[i * 6 + 3] = a[i] <= b[i]
+ c[i * 6 + 4] = a[i] > b[i]
+ c[i * 6 + 5] = a[i] >= b[i]
+
+ a = ti.Vector([1, 1, 2])
+ b = ti.Vector([2, 1, 1])
+ c = ti.field(ti.i32, shape=(18, ))
+ d = ti.field(ti.i32, shape=(18, ))
+
+ for i in range(3):
+ c[i * 6] = a[i] == b[i]
+ c[i * 6 + 1] = a[i] != b[i]
+ c[i * 6 + 2] = a[i] < b[i]
+ c[i * 6 + 3] = a[i] <= b[i]
+ c[i * 6 + 4] = a[i] > b[i]
+ c[i * 6 + 5] = a[i] >= b[i]
+
+ foo(a, b, d)
+ for i in range(18):
+ assert c[i] == d[i]
+
+
+@test_utils.test()
+def test_chain_compare():
+ @ti.kernel
+ def foo(a: ti.i32, b: ti.i32, c: ti.template()):
+ c[0] = a == b == a
+ c[1] = a == b != a
+ c[2] = a != b == a
+ c[3] = a < b > a
+ c[4] = a > b < a
+ c[5] = a < b < a
+ c[6] = a > b > a
+ c[7] = a == a == a == a
+ c[8] = a == a == a != a
+ c[9] = a < b > a < b
+ c[10] = a > b > a < b
+
+ a = 1
+ b = 2
+ c = ti.field(ti.i32, shape=(11, ))
+ d = ti.field(ti.i32, shape=(11, ))
+
+ c[0] = a == b == a
+ c[1] = a == b != a
+ c[2] = a != b == a
+ c[3] = a < b > a
+ c[4] = a > b < a
+ c[5] = a < b < a
+ c[6] = a > b > a
+ c[7] = a == a == a == a
+ c[8] = a == a == a != a
+ c[9] = a < b > a < b
+ c[10] = a > b > a < b
+
+ foo(a, b, d)
+ for i in range(11):
+ assert c[i] == d[i]
+
+
+@test_utils.test()
+def test_return():
+ @ti.kernel
+ def foo(x: ti.i32) -> ti.i32:
+ return x + 1
+
+ assert foo(1) == 2
+
+
+@test_utils.test()
+def test_format_print():
+ a = ti.field(ti.i32, shape=(10, ))
+
+ @ti.kernel
+ def foo():
+ a[0] = 1.0
+ a[5] = 2.0
+ print('Test if the string.format and fstring print works')
+ print('string.format: a[0]={}, a[5]={}'.format(a[0], a[5]))
+ print(f'fstring: a[0]={a[0]}, a[5]={a[5]}')
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_if():
+ @ti.kernel
+ def foo(x: ti.i32) -> ti.i32:
+ ret = 0
+ if x:
+ ret = 1
+ else:
+ ret = 0
+ return ret
+
+ assert foo(1)
+ assert not foo(0)
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_static_if():
+ @ti.kernel
+ def foo(x: ti.template()) -> ti.i32:
+ ret = 0
+ if ti.static(x):
+ ret = 1
+ else:
+ ret = 0
+ return ret
+
+ assert foo(1)
+ assert not foo(0)
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_struct_for():
+ a = ti.field(ti.i32, shape=(10, ))
+
+ @ti.kernel
+ def foo(x: ti.i32):
+ for i in a:
+ a[i] = x
+
+ x = 5
+ foo(x)
+ for i in range(10):
+ assert a[i] == 5
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_grouped_struct_for():
+ a = ti.field(ti.i32, shape=(4, 4))
+
+ @ti.kernel
+ def foo(x: ti.i32):
+ for I in ti.grouped(a):
+ a[I] = x
+
+ x = 5
+ foo(x)
+ for i in range(4):
+ for j in range(4):
+ assert a[i, j] == 5
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_static_for():
+ a = ti.field(ti.i32, shape=(10, ))
+
+ @ti.kernel
+ def foo(x: ti.i32):
+ for i in ti.static(range(10)):
+ a[i] = x
+
+ x = 5
+ foo(x)
+ for i in range(10):
+ assert a[i] == 5
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_static_grouped_for():
+ a = ti.field(ti.i32, shape=(4, 4))
+
+ @ti.kernel
+ def foo(x: ti.i32):
+ for i in ti.static(ti.grouped(ti.ndrange((1, 3), (1, 3)))):
+ a[i] = x
+
+ x = 5
+ foo(x)
+ for i in range(4):
+ for j in range(4):
+ if 1 <= i < 3 and 1 <= j < 3:
+ assert a[i, j] == 5
+ else:
+ assert a[i, j] == 0
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_range_for_single_argument():
+ a = ti.field(ti.i32, shape=(10, ))
+
+ @ti.kernel
+ def foo(x: ti.i32):
+ for i in range(5):
+ a[i] = x
+
+ x = 5
+ foo(x)
+ for i in range(10):
+ if i < 5:
+ assert a[i] == 5
+ else:
+ assert a[i] == 0
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_range_for_two_arguments():
+ a = ti.field(ti.i32, shape=(10, ))
+
+ @ti.kernel
+ def foo(x: ti.i32):
+ for i in range(3, 7):
+ a[i] = x
+
+ x = 5
+ foo(x)
+ for i in range(10):
+ if 3 <= i < 7:
+ assert a[i] == 5
+ else:
+ assert a[i] == 0
+
+
+@test_utils.test()
+def test_range_for_three_arguments():
+ a = ti.field(ti.i32, shape=(10, ))
+
+ with pytest.raises(ti.TaichiCompilationError,
+ match='Range should have 1 or 2 arguments, found 3'):
+
+ @ti.kernel
+ def foo(x: ti.i32):
+ for i in range(3, 7, 2):
+ a[i] = x
+
+ x = 5
+ foo(x)
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_ndrange_for():
+ x = ti.field(ti.f32, shape=(16, 32, 64))
+
+ @ti.kernel
+ def func():
+ for i, j, k in ti.ndrange((4, 10), (3, 8), 17):
+ x[i, j, k] = i + j * 10 + k * 100
+
+ func()
+ for i in range(16):
+ for j in range(32):
+ for k in range(64):
+ if 4 <= i < 10 and 3 <= j < 8 and k < 17:
+ assert x[i, j, k] == i + j * 10 + k * 100
+ else:
+ assert x[i, j, k] == 0
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_grouped_ndrange_for():
+ x = ti.field(ti.i32, shape=(6, 6, 6))
+ y = ti.field(ti.i32, shape=(6, 6, 6))
+
+ @ti.kernel
+ def func():
+ lower = ti.Vector([0, 1, 2])
+ upper = ti.Vector([3, 4, 5])
+ for I in ti.grouped(
+ ti.ndrange((lower[0], upper[0]), (lower[1], upper[1]),
+ (lower[2], upper[2]))):
+ x[I] = I[0] + I[1] + I[2]
+ for i in range(0, 3):
+ for j in range(1, 4):
+ for k in range(2, 5):
+ y[i, j, k] = i + j + k
+
+ func()
+
+ for i in range(6):
+ for j in range(6):
+ for k in range(6):
+ assert x[i, j, k] == y[i, j, k]
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_static_for_break():
+ n = 10
+
+ @ti.kernel
+ def foo(a: ti.template()):
+ for i in ti.static(range(n)):
+ a[i] = 3
+ if ti.static(i >= 5):
+ break
+ a[i] = 10
+ a[i] = 5
+
+ a = ti.field(ti.i32, shape=(n, ))
+ foo(a)
+ for i in range(n):
+ if i < 5:
+ assert a[i] == 5
+ elif i == 5:
+ assert a[i] == 3
+ else:
+ assert a[i] == 0
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_static_grouped_for_break():
+ n = 4
+
+ @ti.kernel
+ def foo(a: ti.template()):
+ for I in ti.static(ti.grouped(ti.ndrange(n, n))):
+ a[I] = 3
+ if ti.static(I[0] >= 3):
+ break
+ a[I] = 10
+ a[I] = 5
+
+ a = ti.field(ti.i32, shape=(n, n))
+ foo(a)
+ for i in range(n):
+ for j in range(n):
+ if i < 3:
+ assert a[i, j] == 5
+ elif i == 3 and j == 0:
+ assert a[i, j] == 3
+ else:
+ assert a[i, j] == 0
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_static_for_continue():
+ n = 10
+
+ @ti.kernel
+ def foo(a: ti.template()):
+ for i in ti.static(range(n)):
+ a[i] = 3
+ if ti.static(i >= 5):
+ continue
+ a[i] = 10
+ a[i] = 5
+
+ a = ti.field(ti.i32, shape=(n, ))
+ foo(a)
+ for i in range(n):
+ if i < 5:
+ assert a[i] == 5
+ else:
+ assert a[i] == 3
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_static_grouped_for_continue():
+ n = 4
+
+ @ti.kernel
+ def foo(a: ti.template()):
+ for I in ti.static(ti.grouped(ti.ndrange(n, n))):
+ a[I] = 3
+ if ti.static(I[0] >= 3):
+ continue
+ a[I] = 10
+ a[I] = 5
+
+ a = ti.field(ti.i32, shape=(n, n))
+ foo(a)
+ for i in range(n):
+ for j in range(n):
+ if i < 3:
+ assert a[i, j] == 5
+ else:
+ assert a[i, j] == 3
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_for_break():
+ n = 4
+
+ @ti.kernel
+ def foo(a: ti.template()):
+ for i in range(n):
+ for j in range(n):
+ a[i, j] = 3
+ if i >= 3:
+ break
+ a[i, j] = 10
+ a[i, j] = 5
+
+ a = ti.field(ti.i32, shape=(n, n))
+ foo(a)
+ for i in range(n):
+ for j in range(n):
+ if i < 3:
+ assert a[i, j] == 5
+ elif i == 3 and j == 0:
+ assert a[i, j] == 3
+ else:
+ assert a[i, j] == 0
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_for_continue():
+ n = 4
+
+ @ti.kernel
+ def foo(a: ti.template()):
+ for i in range(n):
+ for j in range(n):
+ a[i, j] = 3
+ if i >= 3:
+ continue
+ a[i, j] = 10
+ a[i, j] = 5
+
+ a = ti.field(ti.i32, shape=(n, n))
+ foo(a)
+ for i in range(n):
+ for j in range(n):
+ if i < 3:
+ assert a[i, j] == 5
+ else:
+ assert a[i, j] == 3
+
+
+@test_utils.test()
+def test_while():
+ x = ti.field(ti.f32)
+
+ N = 1
+
+ ti.root.dense(ti.i, N).place(x)
+
+ @ti.kernel
+ def func():
+ i = 0
+ s = 0
+ while i < 10:
+ s += i
+ i += 1
+ x[0] = s
+
+ func()
+ assert x[0] == 45
+
+
+@test_utils.test()
+def test_while_break():
+ ret = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ i = 0
+ s = 0
+ while True:
+ s += i
+ i += 1
+ if i > 10:
+ break
+ ret[None] = s
+
+ func()
+ assert ret[None] == 55
+
+
+@test_utils.test()
+def test_while_continue():
+ ret = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ i = 0
+ s = 0
+ while i < 10:
+ i += 1
+ if i % 2 == 0:
+ continue
+ s += i
+ ret[None] = s
+
+ func()
+ assert ret[None] == 25
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_func():
+ @ti.func
+ def bar(x):
+ return x * x, -x
+
+ a = ti.field(ti.i32, shape=(10, ))
+ b = ti.field(ti.i32, shape=(10, ))
+
+ @ti.kernel
+ def foo():
+ for i in a:
+ a[i], b[i] = bar(i)
+
+ foo()
+ for i in range(10):
+ assert a[i] == i * i
+ assert b[i] == -i
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_func_in_python_func():
+ @ti.func
+ def bar(x: ti.template()):
+ if ti.static(x):
+ mat = bar(x // 2)
+ mat = mat @ mat
+ if ti.static(x % 2):
+ mat = mat @ ti.Matrix([[1, 1], [1, 0]])
+ return mat
+ else:
+ return ti.Matrix([[1, 0], [0, 1]])
+
+ def fibonacci(x):
+ return impl.subscript(bar(x), 1, 0)
+
+ @ti.kernel
+ def foo(x: ti.template()) -> ti.i32:
+ return fibonacci(x)
+
+ fib = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
+
+ for i in range(10):
+ assert foo(i) == fib[i]
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_ifexp():
+ @ti.kernel
+ def foo(x: ti.i32) -> ti.i32:
+ return 1 if x else 0
+
+ assert foo(1) == 1
+ assert foo(0) == 0
+
+
+@test_utils.test(print_preprocessed_ir=True)
+def test_static_ifexp():
+ @ti.kernel
+ def foo(x: ti.template()) -> ti.i32:
+ return 1 if ti.static(x) else 0
+
+ assert foo(1) == 1
+ assert foo(0) == 0
+
+
+@test_utils.test()
+def test_static_assign():
+ a = ti.field(ti.i32, shape=(1, ))
+ b = ti.field(ti.i32, shape=(1, ))
+
+ @ti.kernel
+ def foo(xx: ti.template(), yy: ti.template()) -> ti.i32:
+ x, y = ti.static(xx, yy)
+ x[0] -= 1
+ y[0] -= 1
+ return x[0] + y[0]
+
+ a[0] = 2
+ b[0] = 3
+ assert foo(a, b) == 3
+
+
+@test_utils.test()
+def test_static_assign_element():
+ with pytest.raises(
+ ti.TaichiCompilationError,
+ match='Static assign cannot be used on elements in arrays'):
+
+ @ti.kernel
+ def foo():
+ a = ti.static([1, 2, 3])
+ a[0] = ti.static(2)
+
+ foo()
+
+
+@test_utils.test()
+def test_recreate_variable():
+ with pytest.raises(ti.TaichiCompilationError,
+ match='Recreating variables is not allowed'):
+
+ @ti.kernel
+ def foo():
+ a = 1
+ a = ti.static(2)
+
+ foo()
+
+
+@test_utils.test()
+def test_taichi_other_than_ti():
+ import taichi as tc
+
+ @tc.func
+ def bar(x: tc.template()):
+ if tc.static(x):
+ mat = bar(x // 2)
+ mat = mat @ mat
+ if tc.static(x % 2):
+ mat = mat @ tc.Matrix([[1, 1], [1, 0]])
+ return mat
+ else:
+ return tc.Matrix([[1, 0], [0, 1]])
+
+ def fibonacci(x):
+ return impl.subscript(bar(x), 1, 0)
+
+ @tc.kernel
+ def foo(x: tc.template()) -> tc.i32:
+ return fibonacci(x)
+
+ fib = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
+
+ for i in range(10):
+ assert foo(i) == fib[i]
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_assert_message():
+ @ti.kernel
+ def func():
+ x = 20
+ assert 10 <= x < 20, 'Foo bar'
+
+ with pytest.raises(RuntimeError, match='Foo bar'):
+ func()
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_assert_message_formatted():
+ x = ti.field(dtype=int, shape=16)
+ x[10] = 42
+
+ @ti.kernel
+ def assert_formatted():
+ for i in x:
+ assert x[i] == 0, 'x[%d] expect=%d got=%d' % (i, 0, x[i])
+
+ @ti.kernel
+ def assert_float():
+ y = 0.5
+ assert y < 0, 'y = %f' % y
+
+ with pytest.raises(RuntimeError, match=r'x\[10\] expect=0 got=42'):
+ assert_formatted()
+ # TODO: note that we are not fully polished to be able to recover from
+ # assertion failures...
+ with pytest.raises(RuntimeError, match=r'y = 0.5'):
+ assert_float()
+
+ # success case
+ x[10] = 0
+ assert_formatted()
+
+
+@test_utils.test()
+def test_dict():
+ @ti.kernel
+ def foo(x: ti.template()) -> ti.i32:
+ a = {1: 2, 3: 4}
+ b = {5: 6, **a}
+ return b[x]
+
+ assert foo(1) == 2
+ with pytest.raises(ti.TaichiCompilationError):
+ foo(2)
+
+
+@test_utils.test()
+def test_listcomp():
+ @ti.func
+ def identity(dt, n: ti.template()):
+ return ti.Matrix([[ti.cast(int(i == j), dt) for j in range(n)]
+ for i in range(n)])
+
+ @ti.kernel
+ def foo(n: ti.template()) -> ti.i32:
+ a = identity(ti.i32, n)
+ b = [j for i in a for j in i]
+ ret = 0
+ for i in ti.static(range(n)):
+ for j in ti.static(range(n)):
+ ret += i * j * b[i * n + j]
+ return ret
+
+ assert foo(5) == 1 + 4 + 9 + 16
+
+
+@test_utils.test()
+def test_dictcomp():
+ @ti.kernel
+ def foo(n: ti.template()) -> ti.i32:
+ a = {i: i * i for i in range(n) if i % 3 if i % 2}
+ ret = 0
+ for i in ti.static(range(n)):
+ if ti.static(i % 3):
+ if ti.static(i % 2):
+ ret += a[i]
+ return ret
+
+ assert foo(10) == 1 * 1 + 5 * 5 + 7 * 7
+
+
+@test_utils.test()
+def test_dictcomp_fail():
+ @ti.kernel
+ def foo(n: ti.template(), m: ti.template()) -> ti.i32:
+ a = {i: i * i for i in range(n) if i % 3 if i % 2}
+ return a[m]
+
+ with pytest.raises(ti.TaichiCompilationError):
+ foo(5, 2)
+
+ with pytest.raises(ti.TaichiCompilationError):
+ foo(5, 3)
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(arch=[ti.cpu, ti.cuda, ti.opengl])
+def test_ndarray():
+ n = 4
+ m = 7
+
+ @ti.kernel
+ def run(x: ti.any_arr(element_dim=2, layout=ti.Layout.AOS),
+ y: ti.any_arr()):
+ for i in ti.static(range(n)):
+ for j in ti.static(range(m)):
+ x[i, j][0, 0] += i + j + y[i, j]
+
+ a = ti.Matrix.ndarray(1, 1, ti.i32, shape=(n, m))
+ for i in range(n):
+ for j in range(m):
+ a[i, j][0, 0] = i * j
+ b = np.ones((n, m), dtype=np.int32)
+ run(a, b)
+ for i in range(n):
+ for j in range(m):
+ assert a[i, j][0, 0] == i * j + i + j + 1
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_builder():
+ n = 8
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder()):
+ for i, j in ti.static(ti.ndrange(n, n)):
+ Abuilder[i, j] += i + j
+
+ fill(Abuilder)
+ A = Abuilder.build()
+ for i in range(n):
+ for j in range(n):
+ assert A[i, j] == i + j
+
+
+@test_utils.test()
+def test_func_default_value():
+ @ti.func
+ def bar(s, t=1):
+ return s + t
+
+ @ti.kernel
+ def foo() -> ti.i32:
+ return bar(1)
+
+ assert foo() == 2
+
+
+@test_utils.test()
+def test_func_default_value_fail():
+ with pytest.raises(ti.TaichiCompilationError):
+
+ @ti.func
+ def bar(s, t=1):
+ return s + t
+
+ @ti.kernel
+ def foo() -> ti.i32:
+ return bar(1, 2, 3)
+
+ foo()
+
+
+@test_utils.test()
+def test_raise():
+ dim = 1
+ m = ti.Matrix.field(dim, dim, ti.f32)
+ ti.root.place(m)
+
+ with pytest.raises(
+ ti.TaichiCompilationError,
+ match="Polar decomposition only supports 2D and 3D matrices."):
+
+ @ti.kernel
+ def foo():
+ ti.polar_decompose(m, ti.f32)
+
+ foo()
+
+
+@test_utils.test()
+def test_scalar_argument():
+ @ti.kernel
+ def add(a: ti.f32, b: ti.f32) -> ti.f32:
+ a = a + b
+ return a
+
+ assert add(1.0, 2.0) == test_utils.approx(3.0)
+
+
+@test_utils.test()
+def test_default_template_args_on_func():
+ @ti.func
+ def bar(a: ti.template() = 123):
+ return a
+
+ @ti.kernel
+ def foo() -> ti.i32:
+ return bar()
+
+ assert foo() == 123
+
+
+@test_utils.test()
+def test_grouped_static_for_cast():
+ @ti.kernel
+ def foo() -> ti.f32:
+ ret = 0.
+ for I in ti.static(ti.grouped(ti.ndrange((4, 5), (3, 5), 5))):
+ tmp = I.cast(float)
+ ret += tmp[2] / 2
+ return ret
+
+ assert foo() == test_utils.approx(10)
diff --git a/tests/_python_orig/test_ast_resolver.py b/tests/_python_orig/test_ast_resolver.py
new file mode 100644
index 000000000..47ccd8bc3
--- /dev/null
+++ b/tests/_python_orig/test_ast_resolver.py
@@ -0,0 +1,47 @@
+import ast
+from collections import namedtuple
+
+from taichi.lang.ast.symbol_resolver import ASTResolver
+
+
+def test_ast_resolver_basic():
+ # import within the function to avoid polluting the global scope
+ import taichi as ti
+ ti.init()
+ node = ast.parse('ti.kernel', mode='eval').body
+ assert ASTResolver.resolve_to(node, ti.kernel, locals())
+
+
+def test_ast_resolver_direct_import():
+ import taichi as ti
+ ti.init()
+ from taichi import kernel
+ node = ast.parse('kernel', mode='eval').body
+ assert ASTResolver.resolve_to(node, kernel, locals())
+
+
+def test_ast_resolver_alias():
+ import taichi
+ taichi.init()
+ node = ast.parse('taichi.kernel', mode='eval').body
+ assert ASTResolver.resolve_to(node, taichi.kernel, locals())
+
+ import taichi as tc
+ node = ast.parse('tc.kernel', mode='eval').body
+ assert ASTResolver.resolve_to(node, tc.kernel, locals())
+
+
+def test_ast_resolver_chain():
+ import taichi as ti
+ ti.init()
+ node = ast.parse('ti.lang.ops.atomic_add', mode='eval').body
+ assert ASTResolver.resolve_to(node, ti.atomic_add, locals())
+
+
+def test_ast_resolver_wrong_ti():
+ import taichi
+ taichi.init()
+ fake_ti = namedtuple('FakeTi', ['kernel'])
+ ti = fake_ti(kernel='fake')
+ node = ast.parse('ti.kernel', mode='eval').body
+ assert not ASTResolver.resolve_to(node, taichi.kernel, locals())
diff --git a/tests/_python_orig/test_async.py b/tests/_python_orig/test_async.py
new file mode 100644
index 000000000..18f5c5d56
--- /dev/null
+++ b/tests/_python_orig/test_async.py
@@ -0,0 +1,57 @@
+import numpy as np
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.async_mode, async_mode=True)
+def test_simple():
+ n = 32
+
+ x = ti.field(dtype=ti.i32, shape=n)
+
+ @ti.kernel
+ def double():
+ for i in x:
+ x[i] = i * 2
+
+ double()
+
+ for i in range(n):
+ assert x[i] == i * 2
+
+
+@test_utils.test(require=ti.extension.async_mode, async_mode=True)
+def test_numpy():
+ n = 10000
+
+ @ti.kernel
+ def inc(a: ti.ext_arr()):
+ for i in range(n):
+ a[i] += i
+
+ x = np.zeros(dtype=np.int32, shape=n)
+ for i in range(10):
+ inc(x)
+
+ for i in range(n):
+ assert x[i] == i * 10
+
+
+@test_utils.test(require=ti.extension.async_mode, async_mode=True)
+def test_listgen_opt_with_offsets():
+ x = ti.field(dtype=ti.i32)
+
+ ti.root.pointer(ti.i, 4).dense(ti.i, 4).place(x, offset=-8)
+
+ @ti.kernel
+ def inc():
+ for i in x:
+ x[i] += 1
+
+ for i in range(10):
+ inc()
+
+ ti.sync()
+ assert ti.tools.async_utils.get_kernel_stats().get_counters(
+ )['launched_tasks_list_gen'] <= 2
diff --git a/tests/_python_orig/test_atomic.py b/tests/_python_orig/test_atomic.py
new file mode 100644
index 000000000..73896fcfa
--- /dev/null
+++ b/tests/_python_orig/test_atomic.py
@@ -0,0 +1,337 @@
+import taichi as ti
+from tests import test_utils
+
+n = 128
+
+
+def run_atomic_add_global_case(vartype, step, valproc=lambda x: x):
+ x = ti.field(vartype)
+ y = ti.field(vartype)
+ c = ti.field(vartype)
+
+ ti.root.dense(ti.i, n).place(x, y)
+ ti.root.place(c)
+
+ # Make Taichi correctly infer the type
+ # TODO: Taichi seems to treat numpy.int32 as a float type, fix that.
+ init_ck = 0 if vartype == ti.i32 else 0.0
+
+ @ti.kernel
+ def func():
+ ck = init_ck
+ for i in range(n):
+ x[i] = ti.atomic_add(c[None], step)
+ y[i] = ti.atomic_add(ck, step)
+
+ func()
+
+ assert valproc(c[None]) == n * step
+ x_actual = sorted(x.to_numpy())
+ y_actual = sorted(y.to_numpy())
+ expect = [i * step for i in range(n)]
+ for (xa, ya, e) in zip(x_actual, y_actual, expect):
+ print(xa, ya, e)
+ assert valproc(xa) == e
+ assert valproc(ya) == e
+
+
+@test_utils.test()
+def test_atomic_add_global_i32():
+ run_atomic_add_global_case(ti.i32, 42)
+
+
+@test_utils.test()
+def test_atomic_add_global_f32():
+ run_atomic_add_global_case(
+ ti.f32, 4.2, valproc=lambda x: test_utils.approx(x, rel=1e-5))
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_atomic_min_max_uint():
+ x = ti.field(ti.u64, shape=100)
+
+ @ti.kernel
+ def test0():
+ for I in x:
+ x[I] = 0
+ x[1] = ti.cast(1, ti.u64) << 63
+ for I in x:
+ ti.atomic_max(x[0], x[I])
+
+ test0()
+ assert x[0] == 9223372036854775808
+
+ @ti.kernel
+ def test1():
+ for I in x:
+ x[I] = ti.cast(1, ti.u64) << 63
+ x[1] = 100
+ for I in x:
+ ti.atomic_min(x[0], x[I])
+
+ test1()
+ assert x[0] == 100
+
+
+@test_utils.test()
+def test_atomic_add_expr_evaled():
+ c = ti.field(ti.i32)
+ step = 42
+
+ ti.root.place(c)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ # this is an expr with side effect, make sure it's not optimized out.
+ ti.atomic_add(c[None], step)
+
+ func()
+
+ assert c[None] == n * step
+
+
+@test_utils.test()
+def test_atomic_add_demoted():
+ # Ensure demoted atomics do not crash the program.
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+ step = 42
+
+ ti.root.dense(ti.i, n).place(x, y)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ s = i
+ # Both adds should get demoted.
+ x[i] = ti.atomic_add(s, step)
+ y[i] = ti.atomic_add(s, step)
+
+ func()
+
+ for i in range(n):
+ assert x[i] == i
+ assert y[i] == i + step
+
+
+@test_utils.test()
+def test_atomic_add_with_local_store_simplify1():
+ # Test for the following LocalStoreStmt simplification case:
+ #
+ # local store [$a <- ...]
+ # atomic add ($a, ...)
+ # local store [$a <- ...]
+ #
+ # Specifically, the second store should not suppress the first one, because
+ # atomic_add can return value.
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+ step = 42
+
+ ti.root.dense(ti.i, n).place(x, y)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ # do a local store
+ j = i
+ x[i] = ti.atomic_add(j, step)
+ # do another local store, make sure the previous one is not optimized out
+ j = x[i]
+ y[i] = j
+
+ func()
+
+ for i in range(n):
+ assert x[i] == i
+ assert y[i] == i
+
+
+@test_utils.test()
+def test_atomic_add_with_local_store_simplify2():
+ # Test for the following LocalStoreStmt simplification case:
+ #
+ # local store [$a <- ...]
+ # atomic add ($a, ...)
+ #
+ # Specifically, the local store should not be removed, because
+ # atomic_add can return its value.
+ x = ti.field(ti.i32)
+ step = 42
+
+ ti.root.dense(ti.i, n).place(x)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ j = i
+ x[i] = ti.atomic_add(j, step)
+
+ func()
+
+ for i in range(n):
+ assert x[i] == i
+
+
+@test_utils.test()
+def test_atomic_add_with_if_simplify():
+ # Make sure IfStmt simplification doesn't move stmts depending on the result
+ # of atomic_add()
+ x = ti.field(ti.i32)
+ step = 42
+
+ ti.root.dense(ti.i, n).place(x)
+
+ boundary = n / 2
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ if i > boundary:
+ # A sequence of commands designed such that atomic_add() is the only
+ # thing to decide whether the if branch can be simplified.
+ s = i
+ j = ti.atomic_add(s, s)
+ k = j + s
+ x[i] = k
+ else:
+ # If we look at the IR, this branch should be simplified, since nobody
+ # is using atomic_add's result.
+ ti.atomic_add(x[i], i)
+ x[i] += step
+
+ func()
+
+ for i in range(n):
+ expect = i * 3 if i > boundary else (i + step)
+ assert x[i] == expect
+
+
+@test_utils.test()
+def test_local_atomic_with_if():
+ ret = ti.field(dtype=ti.i32, shape=())
+
+ @ti.kernel
+ def test():
+ if True:
+ x = 0
+ x += 1
+ ret[None] = x
+
+ test()
+ assert ret[None] == 1
+
+
+@test_utils.test()
+def test_atomic_sub_expr_evaled():
+ c = ti.field(ti.i32)
+ step = 42
+
+ ti.root.place(c)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ # this is an expr with side effect, make sure it's not optimized out.
+ ti.atomic_sub(c[None], step)
+
+ func()
+
+ assert c[None] == -n * step
+
+
+@test_utils.test()
+def test_atomic_max_expr_evaled():
+ c = ti.field(ti.i32)
+ step = 42
+
+ ti.root.place(c)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ # this is an expr with side effect, make sure it's not optimized out.
+ ti.atomic_max(c[None], i * step)
+
+ func()
+
+ assert c[None] == (n - 1) * step
+
+
+@test_utils.test()
+def test_atomic_min_expr_evaled():
+ c = ti.field(ti.i32)
+ step = 42
+
+ ti.root.place(c)
+
+ @ti.kernel
+ def func():
+ c[None] = 1000
+ for i in range(n):
+ # this is an expr with side effect, make sure it's not optimized out.
+ ti.atomic_min(c[None], i * step)
+
+ func()
+
+ assert c[None] == 0
+
+
+@test_utils.test()
+def test_atomic_and_expr_evaled():
+ c = ti.field(ti.i32)
+ step = 42
+
+ ti.root.place(c)
+
+ max_int = 2147483647
+
+ @ti.kernel
+ def func():
+ c[None] = 1023
+ for i in range(10):
+ # this is an expr with side effect, make sure it's not optimized out.
+ ti.atomic_and(c[None], max_int - 2**i)
+
+ func()
+
+ assert c[None] == 0
+
+
+@test_utils.test()
+def test_atomic_or_expr_evaled():
+ c = ti.field(ti.i32)
+ step = 42
+
+ ti.root.place(c)
+
+ @ti.kernel
+ def func():
+ c[None] = 0
+ for i in range(10):
+ # this is an expr with side effect, make sure it's not optimized out.
+ ti.atomic_or(c[None], 2**i)
+
+ func()
+
+ assert c[None] == 1023
+
+
+@test_utils.test()
+def test_atomic_xor_expr_evaled():
+ c = ti.field(ti.i32)
+ step = 42
+
+ ti.root.place(c)
+
+ @ti.kernel
+ def func():
+ c[None] = 1023
+ for i in range(10):
+ # this is an expr with side effect, make sure it's not optimized out.
+ ti.atomic_xor(c[None], 2**i)
+
+ func()
+
+ assert c[None] == 0
diff --git a/tests/_python_orig/test_basics.py b/tests/_python_orig/test_basics.py
new file mode 100644
index 000000000..cd452526c
--- /dev/null
+++ b/tests/_python_orig/test_basics.py
@@ -0,0 +1,128 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_simple():
+ n = 128
+ x = ti.field(ti.i32, shape=n)
+
+ @ti.kernel
+ def func():
+ x[7] = 120
+
+ func()
+
+ for i in range(n):
+ if i == 7:
+ assert x[i] == 120
+ else:
+ assert x[i] == 0
+
+
+@test_utils.test()
+def test_range_loops():
+ n = 128
+ x = ti.field(ti.i32, shape=n)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ x[i] = i + 123
+
+ func()
+
+ for i in range(n):
+ assert x[i] == i + 123
+
+
+@test_utils.test()
+def test_python_access():
+ n = 128
+ x = ti.field(ti.i32, shape=n)
+
+ x[3] = 123
+ x[4] = 456
+ assert x[3] == 123
+ assert x[4] == 456
+
+
+@test_utils.test()
+def test_if():
+ x = ti.field(ti.f32, shape=16)
+
+ @ti.kernel
+ def if_test():
+ for i in x:
+ if i < 100:
+ x[i] = 100
+ else:
+ x[i] = i
+
+ if_test()
+
+ for i in range(16):
+ assert x[i] == 100
+
+ @ti.kernel
+ def if_test2():
+ for i in x:
+ if i < 100:
+ x[i] = i
+ else:
+ x[i] = 100
+
+ if_test2()
+
+ for i in range(16):
+ assert x[i] == i
+
+
+@test_utils.test()
+def test_if_global_load():
+ x = ti.field(ti.i32, shape=16)
+
+ @ti.kernel
+ def fill():
+ for i in x:
+ if x[i]:
+ x[i] = i
+
+ for i in range(16):
+ x[i] = i % 2
+
+ fill()
+
+ for i in range(16):
+ if i % 2 == 0:
+ assert x[i] == 0
+ else:
+ assert x[i] == i
+
+
+@test_utils.test()
+def test_while_global_load():
+ x = ti.field(ti.i32, shape=16)
+ y = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def run():
+ while x[3]:
+ x[3] -= 1
+ y[None] += 1
+
+ for i in range(16):
+ x[i] = i
+
+ run()
+
+ assert y[None] == 3
+
+
+@test_utils.test()
+def test_datatype_string():
+ for ty in [
+ ti.u8, ti.u16, ti.u32, ti.u64, ti.i8, ti.i16, ti.i32, ti.f32,
+ ti.f64
+ ]:
+ assert ty.to_string() == str(ty)
diff --git a/tests/_python_orig/test_binding.py b/tests/_python_orig/test_binding.py
new file mode 100644
index 000000000..6a7b197d2
--- /dev/null
+++ b/tests/_python_orig/test_binding.py
@@ -0,0 +1,13 @@
+import taichi as ti
+
+
+def test_binding():
+ ti.init()
+ taichi_lang = ti._lib.core
+ print(taichi_lang.BinaryOpType.mul)
+ one = taichi_lang.make_const_expr_int(ti.i32, 1)
+ two = taichi_lang.make_const_expr_int(ti.i32, 2)
+ expr = taichi_lang.make_binary_op_expr(taichi_lang.BinaryOpType.add, one,
+ two)
+ print(expr.serialize())
+ print(taichi_lang.make_global_store_stmt(None, None))
diff --git a/tests/_python_orig/test_bit_array.py b/tests/_python_orig/test_bit_array.py
new file mode 100644
index 000000000..d5426ab3c
--- /dev/null
+++ b/tests/_python_orig/test_bit_array.py
@@ -0,0 +1,54 @@
+import numpy as np
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.quant, debug=True)
+def test_1D_bit_array():
+ cu1 = ti.types.quantized_types.quant.int(1, False)
+
+ x = ti.field(dtype=cu1)
+
+ N = 32
+
+ ti.root.bit_array(ti.i, N, num_bits=32).place(x)
+
+ @ti.kernel
+ def set_val():
+ for i in range(N):
+ x[i] = i % 2
+
+ @ti.kernel
+ def verify_val():
+ for i in range(N):
+ assert x[i] == i % 2
+
+ set_val()
+ verify_val()
+
+
+@test_utils.test(require=ti.extension.quant, debug=True)
+def test_2D_bit_array():
+ ci1 = ti.types.quantized_types.quant.int(1, False)
+
+ x = ti.field(dtype=ci1)
+
+ M, N = 4, 8
+
+ ti.root.bit_array(ti.ij, (M, N), num_bits=32).place(x)
+
+ @ti.kernel
+ def set_val():
+ for i in range(M):
+ for j in range(N):
+ x[i, j] = (i * N + j) % 2
+
+ @ti.kernel
+ def verify_val():
+ for i in range(M):
+ for j in range(N):
+ assert x[i, j] == (i * N + j) % 2
+
+ set_val()
+ verify_val()
diff --git a/tests/_python_orig/test_bit_array_vectorization.py b/tests/_python_orig/test_bit_array_vectorization.py
new file mode 100644
index 000000000..02afb38ec
--- /dev/null
+++ b/tests/_python_orig/test_bit_array_vectorization.py
@@ -0,0 +1,179 @@
+from taichi.lang.impl import get_runtime
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.quant,
+ debug=True,
+ cfg_optimization=False)
+def test_vectorized_struct_for():
+ cu1 = ti.types.quantized_types.quant.int(1, False)
+
+ x = ti.field(dtype=cu1)
+ y = ti.field(dtype=cu1)
+
+ N = 4096
+ n_blocks = 4
+ bits = 32
+ boundary_offset = 1024
+
+ block = ti.root.pointer(ti.ij, (n_blocks, n_blocks))
+ block.dense(ti.ij, (N // n_blocks, N // (bits * n_blocks))).bit_array(
+ ti.j, bits, num_bits=bits).place(x)
+ block.dense(ti.ij, (N // n_blocks, N // (bits * n_blocks))).bit_array(
+ ti.j, bits, num_bits=bits).place(y)
+
+ @ti.kernel
+ def init():
+ for i, j in ti.ndrange((boundary_offset, N - boundary_offset),
+ (boundary_offset, N - boundary_offset)):
+ x[i, j] = ti.random(dtype=ti.i32) % 2
+
+ @ti.kernel
+ def assign_vectorized():
+ get_runtime().prog.current_ast_builder().bit_vectorize(32)
+ for i, j in x:
+ y[i, j] = x[i, j]
+
+ @ti.kernel
+ def verify():
+ for i, j in ti.ndrange((boundary_offset, N - boundary_offset),
+ (boundary_offset, N - boundary_offset)):
+ assert y[i, j] == x[i, j]
+
+ init()
+ assign_vectorized()
+ verify()
+
+
+@test_utils.test(require=ti.extension.quant)
+def test_offset_load():
+ ci1 = ti.types.quantized_types.quant.int(1, False)
+
+ x = ti.field(dtype=ci1)
+ y = ti.field(dtype=ci1)
+ z = ti.field(dtype=ci1)
+
+ N = 4096
+ n_blocks = 4
+ bits = 32
+ boundary_offset = 1024
+ assert boundary_offset >= N // n_blocks
+
+ block = ti.root.pointer(ti.ij, (n_blocks, n_blocks))
+ block.dense(ti.ij, (N // n_blocks, N // (bits * n_blocks))).bit_array(
+ ti.j, bits, num_bits=bits).place(x)
+ block.dense(ti.ij, (N // n_blocks, N // (bits * n_blocks))).bit_array(
+ ti.j, bits, num_bits=bits).place(y)
+ block.dense(ti.ij, (N // n_blocks, N // (bits * n_blocks))).bit_array(
+ ti.j, bits, num_bits=bits).place(z)
+
+ @ti.kernel
+ def init():
+ for i, j in ti.ndrange((boundary_offset, N - boundary_offset),
+ (boundary_offset, N - boundary_offset)):
+ x[i, j] = ti.random(dtype=ti.i32) % 2
+
+ @ti.kernel
+ def assign_vectorized(dx: ti.template(), dy: ti.template()):
+ get_runtime().prog.current_ast_builder().bit_vectorize(32)
+ for i, j in x:
+ y[i, j] = x[i + dx, j + dy]
+ z[i, j] = x[i + dx, j + dy]
+
+ @ti.kernel
+ def verify(dx: ti.template(), dy: ti.template()):
+ for i, j in ti.ndrange((boundary_offset, N - boundary_offset),
+ (boundary_offset, N - boundary_offset)):
+ assert y[i, j] == x[i + dx, j + dy]
+
+ init()
+ assign_vectorized(0, 1)
+ verify(0, 1)
+ assign_vectorized(1, 0)
+ verify(1, 0)
+ assign_vectorized(0, -1)
+ verify(0, -1)
+ assign_vectorized(-1, 0)
+ verify(-1, 0)
+ assign_vectorized(1, 1)
+ verify(1, 1)
+ assign_vectorized(1, -1)
+ verify(1, -1)
+ assign_vectorized(-1, -1)
+ verify(-1, -1)
+ assign_vectorized(-1, 1)
+ verify(-1, 1)
+
+
+@test_utils.test(require=ti.extension.quant, debug=True)
+def test_evolve():
+ ci1 = ti.types.quantized_types.quant.int(1, False)
+
+ x = ti.field(dtype=ci1)
+ y = ti.field(dtype=ci1)
+ z = ti.field(dtype=ci1)
+
+ N = 4096
+ n_blocks = 4
+ bits = 32
+ boundary_offset = 1024
+ assert boundary_offset >= N // n_blocks
+
+ block = ti.root.pointer(ti.ij, (n_blocks, n_blocks))
+ block.dense(ti.ij, (N // n_blocks, N // (bits * n_blocks))).bit_array(
+ ti.j, bits, num_bits=bits).place(x)
+ block.dense(ti.ij, (N // n_blocks, N // (bits * n_blocks))).bit_array(
+ ti.j, bits, num_bits=bits).place(y)
+ block.dense(ti.ij, (N // n_blocks, N // (bits * n_blocks))).bit_array(
+ ti.j, bits, num_bits=bits).place(z)
+
+ @ti.kernel
+ def init():
+ for i, j in ti.ndrange((boundary_offset, N - boundary_offset),
+ (boundary_offset, N - boundary_offset)):
+ x[i, j] = ti.random(dtype=ti.i32) % 2
+
+ @ti.kernel
+ def evolve_vectorized(x: ti.template(), y: ti.template()):
+ get_runtime().prog.current_ast_builder().bit_vectorize(32)
+ for i, j in x:
+ num_active_neighbors = 0
+ num_active_neighbors += ti.cast(x[i - 1, j - 1], ti.u32)
+ num_active_neighbors += ti.cast(x[i - 1, j], ti.u32)
+ num_active_neighbors += ti.cast(x[i - 1, j + 1], ti.u32)
+ num_active_neighbors += ti.cast(x[i, j - 1], ti.u32)
+ num_active_neighbors += ti.cast(x[i, j + 1], ti.u32)
+ num_active_neighbors += ti.cast(x[i + 1, j - 1], ti.u32)
+ num_active_neighbors += ti.cast(x[i + 1, j], ti.u32)
+ num_active_neighbors += ti.cast(x[i + 1, j + 1], ti.u32)
+ y[i, j] = (num_active_neighbors == 3) | \
+ ((num_active_neighbors == 2) & (x[i, j] == 1))
+
+ @ti.kernel
+ def evolve_naive(x: ti.template(), y: ti.template()):
+ for i, j in ti.ndrange((boundary_offset, N - boundary_offset),
+ (boundary_offset, N - boundary_offset)):
+ num_active_neighbors = 0
+ num_active_neighbors += ti.cast(x[i - 1, j - 1], ti.u32)
+ num_active_neighbors += ti.cast(x[i - 1, j], ti.u32)
+ num_active_neighbors += ti.cast(x[i - 1, j + 1], ti.u32)
+ num_active_neighbors += ti.cast(x[i, j - 1], ti.u32)
+ num_active_neighbors += ti.cast(x[i, j + 1], ti.u32)
+ num_active_neighbors += ti.cast(x[i + 1, j - 1], ti.u32)
+ num_active_neighbors += ti.cast(x[i + 1, j], ti.u32)
+ num_active_neighbors += ti.cast(x[i + 1, j + 1], ti.u32)
+ y[i, j] = (num_active_neighbors == 3) or (num_active_neighbors == 2
+ and x[i, j] == 1)
+
+ @ti.kernel
+ def verify():
+ for i, j in ti.ndrange((boundary_offset, N - boundary_offset),
+ (boundary_offset, N - boundary_offset)):
+ assert y[i, j] == z[i, j]
+
+ init()
+ evolve_naive(x, z)
+ evolve_vectorized(x, y)
+ verify()
diff --git a/tests/_python_orig/test_bit_operations.py b/tests/_python_orig/test_bit_operations.py
new file mode 100644
index 000000000..5c11194dd
--- /dev/null
+++ b/tests/_python_orig/test_bit_operations.py
@@ -0,0 +1,49 @@
+import operator as ops
+
+import numpy as np
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_bit_shl():
+ @ti.kernel
+ def shl(a: ti.i32, b: ti.i32) -> ti.i32:
+ return a << b
+
+ for i in range(8):
+ assert shl(3, i) == 3 * 2**i
+
+
+@test_utils.test()
+def test_bit_sar():
+ @ti.kernel
+ def sar(a: ti.i32, b: ti.i32) -> ti.i32:
+ return a >> b
+
+ n = 8
+ test_num = 2**n
+ neg_test_num = -test_num
+ for i in range(n):
+ assert sar(test_num, i) == 2**(n - i)
+ # for negative number
+ for i in range(n):
+ assert sar(neg_test_num, i) == -2**(n - i)
+
+
+@test_utils.test()
+def test_bit_shr():
+ @ti.kernel
+ def shr(a: ti.i32, b: ti.i32) -> ti.i32:
+ return ti.bit_shr(a, b)
+
+ n = 8
+ test_num = 2**n
+ neg_test_num = -test_num
+ for i in range(n):
+ assert shr(test_num, i) == 2**(n - i)
+ for i in range(n):
+ offset = 0x100000000 if i > 0 else 0
+ assert shr(neg_test_num, i) == (neg_test_num + offset) >> i
diff --git a/tests/_python_orig/test_bit_struct.py b/tests/_python_orig/test_bit_struct.py
new file mode 100644
index 000000000..dde979230
--- /dev/null
+++ b/tests/_python_orig/test_bit_struct.py
@@ -0,0 +1,174 @@
+import numpy as np
+from pytest import approx
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.quant_basic, debug=True)
+def test_simple_array():
+ ci13 = ti.types.quantized_types.quant.int(13, True)
+ cu19 = ti.types.quantized_types.quant.int(19, False)
+
+ x = ti.field(dtype=ci13)
+ y = ti.field(dtype=cu19)
+
+ N = 12
+
+ ti.root.dense(ti.i, N).bit_struct(num_bits=32).place(x, y)
+
+ @ti.kernel
+ def set_val():
+ for i in range(N):
+ x[i] = -2**i
+ y[i] = 2**i - 1
+
+ @ti.kernel
+ def verify_val():
+ for i in range(N):
+ assert x[i] == -2**i
+ assert y[i] == 2**i - 1
+
+ set_val()
+ verify_val()
+
+ # Test bit_struct SNode read and write in Python-scope by calling the wrapped, untranslated function body
+ set_val.__wrapped__()
+ verify_val.__wrapped__()
+
+
+# TODO: remove excluding of ti.metal
+@test_utils.test(require=ti.extension.quant_basic,
+ exclude=[ti.metal],
+ debug=True)
+def test_custom_int_load_and_store():
+ ci13 = ti.types.quantized_types.quant.int(13, True)
+ cu14 = ti.types.quantized_types.quant.int(14, False)
+ ci5 = ti.types.quantized_types.quant.int(5, True)
+
+ x = ti.field(dtype=ci13)
+ y = ti.field(dtype=cu14)
+ z = ti.field(dtype=ci5)
+
+ test_case_np = np.array(
+ [[2**12 - 1, 2**14 - 1, -(2**3)], [2**11 - 1, 2**13 - 1, -(2**2)],
+ [0, 0, 0], [123, 4567, 8], [10, 31, 11]],
+ dtype=np.int32)
+
+ ti.root.bit_struct(num_bits=32).place(x, y, z)
+ test_case = ti.Vector.field(3, dtype=ti.i32, shape=len(test_case_np))
+ test_case.from_numpy(test_case_np)
+
+ @ti.kernel
+ def set_val(idx: ti.i32):
+ x[None] = test_case[idx][0]
+ y[None] = test_case[idx][1]
+ z[None] = test_case[idx][2]
+
+ @ti.kernel
+ def verify_val(idx: ti.i32):
+ assert x[None] == test_case[idx][0]
+ assert y[None] == test_case[idx][1]
+ assert z[None] == test_case[idx][2]
+
+ for idx in range(len(test_case_np)):
+ set_val(idx)
+ verify_val(idx)
+
+ # Test bit_struct SNode read and write in Python-scope by calling the wrapped, untranslated function body
+ for idx in range(len(test_case_np)):
+ set_val.__wrapped__(idx)
+ verify_val.__wrapped__(idx)
+
+
+@test_utils.test(require=ti.extension.quant_basic)
+def test_custom_int_full_struct():
+ cit = ti.types.quantized_types.quant.int(32, True)
+ x = ti.field(dtype=cit)
+ ti.root.dense(ti.i, 1).bit_struct(num_bits=32).place(x)
+
+ x[0] = 15
+ assert x[0] == 15
+
+ x[0] = 12
+ assert x[0] == 12
+
+
+def test_bit_struct():
+ def test_single_bit_struct(physical_type, compute_type, custom_bits,
+ test_case):
+ ti.init(arch=ti.cpu, debug=True)
+
+ cit1 = ti.types.quantized_types.quant.int(custom_bits[0], True,
+ compute_type)
+ cit2 = ti.types.quantized_types.quant.int(custom_bits[1], False,
+ compute_type)
+ cit3 = ti.types.quantized_types.quant.int(custom_bits[2], True,
+ compute_type)
+
+ a = ti.field(dtype=cit1)
+ b = ti.field(dtype=cit2)
+ c = ti.field(dtype=cit3)
+ ti.root.bit_struct(num_bits=physical_type).place(a, b, c)
+
+ @ti.kernel
+ def set_val(test_val: ti.ext_arr()):
+ a[None] = test_val[0]
+ b[None] = test_val[1]
+ c[None] = test_val[2]
+
+ @ti.kernel
+ def verify_val(test_val: ti.ext_arr()):
+ assert a[None] == test_val[0]
+ assert b[None] == test_val[1]
+ assert c[None] == test_val[2]
+
+ set_val(test_case)
+ verify_val(test_case)
+
+ ti.reset()
+
+ test_single_bit_struct(8, ti.i8, [3, 3, 2],
+ np.array([2**2 - 1, 2**3 - 1, -2**1]))
+ test_single_bit_struct(16, ti.i16, [4, 7, 5],
+ np.array([2**3 - 1, 2**7 - 1, -2**4]))
+ test_single_bit_struct(32, ti.i32, [17, 11, 4],
+ np.array([2**16 - 1, 2**10 - 1, -2**3]))
+ test_single_bit_struct(64, ti.i64, [32, 23, 9],
+ np.array([2**31 - 1, 2**23 - 1, -2**8]))
+ test_single_bit_struct(32, ti.i16, [7, 12, 13],
+ np.array([2**6 - 1, 2**12 - 1, -2**12]))
+ test_single_bit_struct(64, ti.i32, [18, 22, 24],
+ np.array([2**17 - 1, 2**22 - 1, -2**23]))
+
+ test_single_bit_struct(16, ti.i16, [5, 5, 6], np.array([15, 5, 20]))
+ test_single_bit_struct(32, ti.i32, [10, 10, 12], np.array([11, 19, 2020]))
+
+
+@test_utils.test(require=[ti.extension.quant_basic, ti.extension.sparse],
+ debug=True)
+def test_bit_struct_struct_for():
+ block_size = 16
+ N = 64
+ cell = ti.root.pointer(ti.i, N // block_size)
+ fixed32 = ti.types.quantized_types.quant.fixed(frac=32, num_range=1024)
+
+ x = ti.field(dtype=fixed32)
+ cell.dense(ti.i, block_size).bit_struct(32).place(x)
+
+ for i in range(N):
+ if i // block_size % 2 == 0:
+ x[i] = 0
+
+ @ti.kernel
+ def assign():
+ for i in x:
+ x[i] = ti.cast(i, float)
+
+ assign()
+
+ for i in range(N):
+ if i // block_size % 2 == 0:
+ assert x[i] == approx(i, abs=1e-3)
+ else:
+ assert x[i] == 0
diff --git a/tests/_python_orig/test_bitmasked.py b/tests/_python_orig/test_bitmasked.py
new file mode 100644
index 000000000..e31b4eb71
--- /dev/null
+++ b/tests/_python_orig/test_bitmasked.py
@@ -0,0 +1,315 @@
+import taichi as ti
+from tests import test_utils
+
+
+def _test_basic():
+ x = ti.field(ti.i32)
+ c = ti.field(ti.i32)
+ s = ti.field(ti.i32)
+
+ bm = ti.root.bitmasked(ti.ij, (3, 6)).bitmasked(ti.i, 5)
+ bm.place(x)
+ ti.root.place(c, s)
+
+ @ti.kernel
+ def run():
+ x[5, 1] = 2
+ x[9, 4] = 20
+ x[0, 3] = 20
+
+ @ti.kernel
+ def sum():
+ for i, j in x:
+ c[None] += ti.is_active(bm, [i, j])
+ s[None] += x[i, j]
+
+ run()
+ sum()
+
+ assert c[None] == 3
+ assert s[None] == 42
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_basic():
+ _test_basic()
+
+
+@test_utils.test(require=[ti.extension.sparse, ti.extension.packed],
+ packed=True)
+def test_basic_packed():
+ _test_basic()
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_bitmasked_then_dense():
+ x = ti.field(ti.f32)
+ s = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.bitmasked(ti.i, n).dense(ti.i, n).place(x)
+ ti.root.place(s)
+
+ @ti.kernel
+ def func():
+ for i in x:
+ s[None] += 1
+
+ x[0] = 1
+ x[127] = 1
+ x[256] = 1
+ x[257] = 1
+
+ func()
+ assert s[None] == 256
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_bitmasked_bitmasked():
+ x = ti.field(ti.f32)
+ s = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.bitmasked(ti.i, n).bitmasked(ti.i, n).place(x)
+ ti.root.place(s)
+
+ @ti.kernel
+ def func():
+ for i in x:
+ s[None] += 1
+
+ x[0] = 1
+ x[127] = 1
+ x[256] = 1
+ x[257] = 1
+
+ func()
+ assert s[None] == 4
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_huge_bitmasked():
+ # Mainly for testing Metal listgen's grid-stride loop implementation.
+ x = ti.field(ti.f32)
+ s = ti.field(ti.i32)
+
+ n = 1024
+
+ ti.root.bitmasked(ti.i, n).bitmasked(ti.i, 2 * n).place(x)
+ ti.root.place(s)
+
+ @ti.kernel
+ def func():
+ for i in range(n * n * 2):
+ if i % 32 == 0:
+ x[i] = 1.0
+
+ @ti.kernel
+ def count():
+ for i in x:
+ s[None] += 1
+
+ func()
+ count()
+ assert s[None] == (n * n * 2) // 32
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_bitmasked_listgen_bounded():
+ # Mainly for testing Metal's listgen is bounded by the actual number of
+ # elements possible for that SNode. Note that 1) SNode's size is padded
+ # to POT, and 2) Metal ListManager's data size is not padded, we need to
+ # make sure listgen doesn't go beyond ListManager's capacity.
+ x = ti.field(ti.i32)
+ c = ti.field(ti.i32)
+
+ # A prime that is bit higher than 65536, which is Metal's maximum number of
+ # threads for listgen.
+ n = 80173
+
+ ti.root.dense(ti.i, n).bitmasked(ti.i, 1).place(x)
+ ti.root.place(c)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ x[i] = 1
+
+ @ti.kernel
+ def count():
+ for i in x:
+ c[None] += 1
+
+ func()
+ count()
+ assert c[None] == n
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_deactivate():
+ # https://github.com/taichi-dev/taichi/issues/778
+ a = ti.field(ti.i32)
+ a_a = ti.root.bitmasked(ti.i, 4)
+ a_b = a_a.dense(ti.i, 4)
+ a_b.place(a)
+ c = ti.field(ti.i32)
+ ti.root.place(c)
+
+ @ti.kernel
+ def run():
+ a[0] = 123
+
+ @ti.kernel
+ def is_active():
+ c[None] = ti.is_active(a_a, [0])
+
+ @ti.kernel
+ def deactivate():
+ ti.deactivate(a_a, [0])
+
+ run()
+ is_active()
+ assert c[None] == 1
+
+ deactivate()
+ is_active()
+ assert c[None] == 0
+
+
+def _test_sparsity_changes():
+ x = ti.field(ti.i32)
+ c = ti.field(ti.i32)
+ s = ti.field(ti.i32)
+
+ bm = ti.root.bitmasked(ti.i, 5).bitmasked(ti.i, 3)
+ bm.place(x)
+ ti.root.place(c, s)
+
+ @ti.kernel
+ def run():
+ for i in x:
+ s[None] += x[i]
+ c[None] += 1
+
+ # Only two elements of |x| are activated
+ x[1] = 2
+ x[8] = 20
+ run()
+ assert c[None] == 2
+ assert s[None] == 22
+
+ c[None] = 0
+ s[None] = 0
+ # Four elements are activated now
+ x[7] = 15
+ x[14] = 5
+
+ run()
+ assert c[None] == 4
+ assert s[None] == 42
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_sparsity_changes():
+ _test_sparsity_changes()
+
+
+@test_utils.test(require=[ti.extension.sparse, ti.extension.packed],
+ packed=True)
+def test_sparsity_changes_packed():
+ _test_sparsity_changes()
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_bitmasked_offset_child():
+ x = ti.field(ti.i32)
+ x2 = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+ y2 = ti.field(ti.i32)
+ y3 = ti.field(ti.i32)
+ z = ti.field(ti.i32)
+ s = ti.field(ti.i32, shape=())
+
+ n = 16
+ # Offset children:
+ # * In |bm|'s cell: |bm2| has a non-zero offset
+ # * In |bm2|'s cell: |z| has a non-zero offset
+ # * We iterate over |z| to test the listgen handles offsets correctly
+ bm = ti.root.bitmasked(ti.i, n)
+ bm.dense(ti.i, 16).place(x, x2)
+ bm2 = bm.bitmasked(ti.i, 4)
+
+ bm2.dense(ti.i, 4).place(y, y2, y3)
+ bm2.bitmasked(ti.i, 4).place(z)
+
+ @ti.kernel
+ def func():
+ for _ in z:
+ s[None] += 1
+
+ z[0] = 1
+ z[7] = 1
+ z[42] = 1
+ z[53] = 1
+ z[88] = 1
+ z[101] = 1
+ z[233] = 1
+
+ func()
+ assert s[None] == 7
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_bitmasked_2d_power_of_two():
+ some_val = ti.field(dtype=float)
+ width, height = 10, 10
+ total = width * height
+ ptr = ti.root.bitmasked(ti.ij, (width, height))
+ ptr.place(some_val)
+ num_active = ti.field(dtype=int, shape=())
+
+ @ti.kernel
+ def init():
+ num_active[None] = 0
+ for x, y in ti.ndrange(width, height):
+ some_val[x, y] = 5
+ num_active[None] += 1
+
+ @ti.kernel
+ def run():
+ num_active[None] = 0
+ for x, y in some_val:
+ num_active[None] += 1
+
+ init()
+ assert num_active[None] == total
+ run()
+ assert num_active[None] == total
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_root_deactivate():
+ a = ti.field(ti.i32)
+ a_a = ti.root.bitmasked(ti.i, 4)
+ a_b = a_a.dense(ti.i, 4)
+ a_b.place(a)
+ c = ti.field(ti.i32)
+ ti.root.place(c)
+
+ @ti.kernel
+ def run():
+ a[0] = 123
+
+ @ti.kernel
+ def is_active():
+ c[None] = ti.is_active(a_a, [0])
+
+ run()
+ is_active()
+ assert c[None] == 1
+
+ ti.root.deactivate_all()
+ is_active()
+ assert c[None] == 0
diff --git a/tests/_python_orig/test_bls.py b/tests/_python_orig/test_bls.py
new file mode 100644
index 000000000..b4116bbf0
--- /dev/null
+++ b/tests/_python_orig/test_bls.py
@@ -0,0 +1,176 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_simple_1d():
+ x, y = ti.field(ti.f32), ti.field(ti.f32)
+
+ N = 64
+ bs = 16
+
+ ti.root.pointer(ti.i, N // bs).dense(ti.i, bs).place(x, y)
+
+ @ti.kernel
+ def populate():
+ for i in range(N):
+ x[i] = i
+
+ @ti.kernel
+ def copy():
+ ti.block_local(x)
+ for i in x:
+ y[i] = x[i]
+
+ populate()
+ copy()
+
+ for i in range(N):
+ assert y[i] == i
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_simple_2d():
+ x, y = ti.field(ti.f32), ti.field(ti.f32)
+
+ N = 16
+ bs = 16
+
+ ti.root.pointer(ti.ij, N // bs).dense(ti.ij, bs).place(x, y)
+
+ @ti.kernel
+ def populate():
+ for i, j in ti.ndrange(N, N):
+ x[i, j] = i - j
+
+ @ti.kernel
+ def copy():
+ ti.block_local(x)
+ for i, j in x:
+ y[i, j] = x[i, j]
+
+ populate()
+ copy()
+
+ for i in range(N):
+ for j in range(N):
+ assert y[i, j] == i - j
+
+
+def _test_bls_stencil(*args, **kwargs):
+ from .bls_test_template import bls_test_template
+ bls_test_template(*args, **kwargs)
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_gather_1d_trivial():
+ # y[i] = x[i]
+ _test_bls_stencil(1, 128, bs=32, stencil=((0, ), ))
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_gather_1d():
+ # y[i] = x[i - 1] + x[i]
+ _test_bls_stencil(1, 128, bs=32, stencil=((-1, ), (0, )))
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_gather_2d():
+ stencil = [(0, 0), (0, -1), (0, 1), (1, 0)]
+ _test_bls_stencil(2, 128, bs=16, stencil=stencil)
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_gather_2d_nonsquare():
+ stencil = [(0, 0), (0, -1), (0, 1), (1, 0)]
+ _test_bls_stencil(2, 128, bs=(4, 16), stencil=stencil)
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_gather_3d():
+ stencil = [(-1, -1, -1), (2, 0, 1)]
+ _test_bls_stencil(3, 64, bs=(4, 8, 16), stencil=stencil)
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_scatter_1d_trivial():
+ # y[i] = x[i]
+ _test_bls_stencil(1, 128, bs=32, stencil=((0, ), ), scatter=True)
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_scatter_1d():
+ _test_bls_stencil(1, 128, bs=32, stencil=(
+ (1, ),
+ (0, ),
+ ), scatter=True)
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_scatter_2d():
+ stencil = [(0, 0), (0, -1), (0, 1), (1, 0)]
+ _test_bls_stencil(2, 128, bs=16, stencil=stencil, scatter=True)
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_multiple_inputs():
+ x, y, z, w, w2 = ti.field(ti.i32), ti.field(ti.i32), ti.field(
+ ti.i32), ti.field(ti.i32), ti.field(ti.i32)
+
+ N = 128
+ bs = 8
+
+ ti.root.pointer(ti.ij, N // bs).dense(ti.ij, bs).place(x, y, z, w, w2)
+
+ @ti.kernel
+ def populate():
+ for i, j in ti.ndrange((bs, N - bs), (bs, N - bs)):
+ x[i, j] = i - j
+ y[i, j] = i + j * j
+ z[i, j] = i * i - j
+
+ @ti.kernel
+ def copy(bls: ti.template(), w: ti.template()):
+ if ti.static(bls):
+ ti.block_local(x, y, z)
+ for i, j in x:
+ w[i,
+ j] = x[i, j - 2] + y[i + 2, j -
+ 1] + y[i - 1, j] + z[i - 1, j] + z[i + 1, j]
+
+ populate()
+ copy(False, w2)
+ copy(True, w)
+
+ for i in range(N):
+ for j in range(N):
+ assert w[i, j] == w2[i, j]
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_bls_large_block():
+ n = 2**10
+ block_size = 32
+ stencil_length = 28 # uses 60 * 60 * 4B = 14.0625KiB shared memory
+
+ a = ti.field(dtype=ti.f32)
+ b = ti.field(dtype=ti.f32)
+ block = ti.root.pointer(ti.ij, n // block_size)
+ block.dense(ti.ij, block_size).place(a)
+ block.dense(ti.ij, block_size).place(b)
+
+ @ti.kernel
+ def foo():
+ ti.block_dim(512)
+ ti.block_local(a)
+ for i, j in a:
+ for k in range(stencil_length):
+ b[i, j] += a[i + k, j]
+ b[i, j] += a[i, j + k]
+
+ foo()
+
+
+# TODO: BLS on CPU
+# TODO: BLS boundary out of bound
+# TODO: BLS with TLS
diff --git a/tests/_python_orig/test_bls_assume_in_range.py b/tests/_python_orig/test_bls_assume_in_range.py
new file mode 100644
index 000000000..c7dc0cbe3
--- /dev/null
+++ b/tests/_python_orig/test_bls_assume_in_range.py
@@ -0,0 +1,53 @@
+import taichi as ti
+from tests import test_utils
+
+from .bls_test_template import bls_particle_grid
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_scattering():
+ bls_particle_grid(N=128,
+ ppc=10,
+ block_size=8,
+ scatter=True,
+ use_offset=False)
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_scattering_offset():
+ bls_particle_grid(N=128,
+ ppc=10,
+ block_size=8,
+ scatter=True,
+ use_offset=True)
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_scattering_two_pointer_levels():
+ bls_particle_grid(N=128,
+ ppc=10,
+ block_size=8,
+ scatter=True,
+ pointer_level=2,
+ use_offset=False)
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_gathering():
+ bls_particle_grid(N=128,
+ ppc=10,
+ block_size=8,
+ scatter=False,
+ use_offset=False)
+
+
+@test_utils.test(require=ti.extension.bls)
+def test_gathering_offset():
+ bls_particle_grid(N=128,
+ ppc=10,
+ block_size=8,
+ scatter=False,
+ use_offset=True)
+
+
+# TODO: debug mode behavior of assume_in_range
diff --git a/tests/_python_orig/test_bool_op.py b/tests/_python_orig/test_bool_op.py
new file mode 100644
index 000000000..d15160e68
--- /dev/null
+++ b/tests/_python_orig/test_bool_op.py
@@ -0,0 +1,68 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(debug=True, short_circuit_operators=True)
+def test_and_shorted():
+ a = ti.field(ti.i32, shape=10)
+
+ @ti.func
+ def explode() -> ti.i32:
+ return a[-1]
+
+ @ti.kernel
+ def func() -> ti.i32:
+ return False and explode()
+
+ assert func() == 0
+
+
+@test_utils.test(debug=True, short_circuit_operators=True)
+def test_and_not_shorted():
+ @ti.kernel
+ def func() -> ti.i32:
+ return True and False
+
+ assert func() == 0
+
+
+@test_utils.test(debug=True, short_circuit_operators=True)
+def test_or_shorted():
+ a = ti.field(ti.i32, shape=10)
+
+ @ti.func
+ def explode() -> ti.i32:
+ return a[-1]
+
+ @ti.kernel
+ def func() -> ti.i32:
+ return True or explode()
+
+ assert func() == 1
+
+
+@test_utils.test(debug=True, short_circuit_operators=True)
+def test_or_not_shorted():
+ @ti.kernel
+ def func() -> ti.i32:
+ return False or True
+
+ assert func() == 1
+
+
+@test_utils.test(debug=True)
+def test_static_or():
+ @ti.kernel
+ def func() -> ti.i32:
+ return ti.static(0 or 3 or 5)
+
+ assert func() == 3
+
+
+@test_utils.test(debug=True)
+def test_static_and():
+ @ti.kernel
+ def func() -> ti.i32:
+ return ti.static(5 and 2 and 0)
+
+ assert func() == 0
diff --git a/tests/_python_orig/test_callable_template_mapper.py b/tests/_python_orig/test_callable_template_mapper.py
new file mode 100644
index 000000000..d52be405d
--- /dev/null
+++ b/tests/_python_orig/test_callable_template_mapper.py
@@ -0,0 +1,54 @@
+from taichi.lang.kernel_impl import TaichiCallableTemplateMapper
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_callable_template_mapper():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.f32)
+
+ ti.root.place(x, y)
+
+ mapper = TaichiCallableTemplateMapper(
+ (ti.template(), ti.template(), ti.template()),
+ template_slot_locations=(0, 1, 2))
+ assert mapper.lookup((0, 0, 0))[0] == 0
+ assert mapper.lookup((0, 1, 0))[0] == 1
+ assert mapper.lookup((0, 0, 0))[0] == 0
+ assert mapper.lookup((0, 0, 1))[0] == 2
+ assert mapper.lookup((0, 1, 0))[0] == 1
+
+ mapper = TaichiCallableTemplateMapper((ti.i32, ti.i32, ti.i32), ())
+ assert mapper.lookup((0, 0, 0))[0] == 0
+ assert mapper.lookup((0, 1, 0))[0] == 0
+ assert mapper.lookup((0, 0, 0))[0] == 0
+ assert mapper.lookup((0, 0, 1))[0] == 0
+ assert mapper.lookup((0, 1, 0))[0] == 0
+
+ mapper = TaichiCallableTemplateMapper((ti.i32, ti.template(), ti.i32),
+ (1, ))
+ assert mapper.lookup((0, x, 0))[0] == 0
+ assert mapper.lookup((0, y, 0))[0] == 1
+ assert mapper.lookup((0, x, 1))[0] == 0
+
+
+@test_utils.test()
+def test_callable_template_mapper_numpy():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.f32)
+
+ ti.root.place(x, y)
+
+ annotations = (ti.template(), ti.template(), ti.ext_arr())
+
+ import numpy as np
+
+ mapper = TaichiCallableTemplateMapper(annotations, (0, 1, 2))
+ assert mapper.lookup((0, 0, np.ones(shape=(1, 2, 3),
+ dtype=np.float32)))[0] == 0
+ assert mapper.lookup((0, 0, np.ones(shape=(1, 2, 4),
+ dtype=np.float32)))[0] == 0
+ assert mapper.lookup((0, 0, np.ones(shape=(1, 2, 1),
+ dtype=np.int32)))[0] == 1
diff --git a/tests/_python_orig/test_cast.py b/tests/_python_orig/test_cast.py
new file mode 100644
index 000000000..0bcca774a
--- /dev/null
+++ b/tests/_python_orig/test_cast.py
@@ -0,0 +1,175 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@pytest.mark.parametrize('dtype', [ti.u8, ti.u16, ti.u32])
+@test_utils.test(exclude=ti.opengl)
+def test_cast_uint_to_float(dtype):
+ @ti.kernel
+ def func(a: dtype) -> ti.f32:
+ return ti.cast(a, ti.f32)
+
+ assert func(255) == 255
+
+
+@pytest.mark.parametrize('dtype', [ti.u8, ti.u16, ti.u32])
+@test_utils.test(exclude=ti.opengl)
+def test_cast_float_to_uint(dtype):
+ @ti.kernel
+ def func(a: ti.f32) -> dtype:
+ return ti.cast(a, dtype)
+
+ assert func(255) == 255
+
+
+@test_utils.test()
+def test_cast_f32():
+ z = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ z[None] = ti.cast(1e9, ti.f32) / ti.cast(1e6, ti.f32) + 1e-3
+
+ func()
+ assert z[None] == 1000
+
+
+@test_utils.test(require=ti.extension.data64)
+def test_cast_f64():
+ z = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ z[None] = ti.cast(1e13, ti.f64) / ti.cast(1e10, ti.f64) + 1e-3
+
+ func()
+ assert z[None] == 1000
+
+
+@pytest.mark.parametrize('dtype', [ti.f32, ti.f64])
+def test_cast_default_fp(dtype):
+ ti.init(default_fp=dtype)
+
+ @ti.kernel
+ def func(x: int, y: int) -> float:
+ return ti.cast(x, float) * float(y)
+
+ assert func(23, 4) == pytest.approx(23.0 * 4.0)
+
+
+@pytest.mark.parametrize('dtype', [ti.i32, ti.i64])
+def test_cast_default_ip(dtype):
+ ti.init(default_ip=dtype)
+
+ @ti.kernel
+ def func(x: float, y: float) -> int:
+ return ti.cast(x, int) * int(y)
+
+ # make sure that int(4.6) == 4:
+ assert func(23.3, 4.6) == 23 * 4
+ if dtype == ti.i64:
+ large = 1000000000
+ assert func(large, 233) == large * 233
+ assert func(233, large) == 233 * large
+
+
+@test_utils.test()
+def test_cast_within_while():
+ ret = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ t = 10
+ while t > 5:
+ t = 1.0
+ break
+ ret[None] = t
+
+ func()
+
+
+@test_utils.test()
+def test_bit_cast():
+ x = ti.field(ti.i32, shape=())
+ y = ti.field(ti.f32, shape=())
+ z = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func1():
+ y[None] = ti.bit_cast(x[None], ti.f32)
+
+ @ti.kernel
+ def func2():
+ z[None] = ti.bit_cast(y[None], ti.i32)
+
+ x[None] = 2333
+ func1()
+ func2()
+ assert z[None] == 2333
+
+
+@test_utils.test(arch=ti.cpu)
+def test_int_extension():
+ x = ti.field(dtype=ti.i32, shape=2)
+ y = ti.field(dtype=ti.u32, shape=2)
+
+ a = ti.field(dtype=ti.i8, shape=1)
+ b = ti.field(dtype=ti.u8, shape=1)
+
+ @ti.kernel
+ def run_cast_i32():
+ x[0] = ti.cast(a[0], ti.i32)
+ x[1] = ti.cast(b[0], ti.i32)
+
+ @ti.kernel
+ def run_cast_u32():
+ y[0] = ti.cast(a[0], ti.u32)
+ y[1] = ti.cast(b[0], ti.u32)
+
+ a[0] = -128
+ b[0] = -128
+
+ run_cast_i32()
+ assert x[0] == -128
+ assert x[1] == 128
+
+ run_cast_u32()
+ assert y[0] == 0xFFFFFF80
+ assert y[1] == 128
+
+
+@test_utils.test(arch=ti.cpu)
+def test_custom_int_extension():
+ x = ti.field(dtype=ti.i32, shape=2)
+ y = ti.field(dtype=ti.u32, shape=2)
+
+ ci5 = ti.types.quantized_types.quant.int(5, True, ti.i16)
+ cu7 = ti.types.quantized_types.quant.int(7, False, ti.u16)
+
+ a = ti.field(dtype=ci5)
+ b = ti.field(dtype=cu7)
+
+ ti.root.bit_struct(num_bits=32).place(a, b)
+
+ @ti.kernel
+ def run_cast_int():
+ x[0] = ti.cast(a[None], ti.i32)
+ x[1] = ti.cast(b[None], ti.i32)
+
+ @ti.kernel
+ def run_cast_uint():
+ y[0] = ti.cast(a[None], ti.u32)
+ y[1] = ti.cast(b[None], ti.u32)
+
+ a[None] = -16
+ b[None] = -64
+
+ run_cast_int()
+ assert x[0] == -16
+ assert x[1] == 64
+
+ run_cast_uint()
+ assert y[0] == 0xFFFFFFF0
+ assert y[1] == 64
diff --git a/tests/_python_orig/test_classfunc.py b/tests/_python_orig/test_classfunc.py
new file mode 100644
index 000000000..95c4b2c6f
--- /dev/null
+++ b/tests/_python_orig/test_classfunc.py
@@ -0,0 +1,23 @@
+from taichi.lang.misc import get_host_arch_list
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_classfunc():
+ @ti.data_oriented
+ class Foo:
+ def __init__(self):
+ self.val = ti.Matrix.field(n=3, m=3, dtype=ti.f32, shape=3)
+
+ @ti.func
+ def add_mat(self, a, b):
+ return a + b
+
+ @ti.kernel
+ def fill(self):
+ self.val[0] = self.add_mat(self.val[1], self.val[2])
+
+ foo = Foo()
+ foo.fill()
diff --git a/tests/_python_orig/test_clear_all_gradients.py b/tests/_python_orig/test_clear_all_gradients.py
new file mode 100644
index 000000000..f93e78049
--- /dev/null
+++ b/tests/_python_orig/test_clear_all_gradients.py
@@ -0,0 +1,40 @@
+from taichi.lang import impl
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(exclude=[ti.vulkan])
+def test_clear_all_gradients():
+ x = ti.field(ti.f32)
+ y = ti.field(ti.f32)
+ z = ti.field(ti.f32)
+ w = ti.field(ti.f32)
+
+ n = 128
+
+ ti.root.place(x)
+ ti.root.dense(ti.i, n).place(y)
+ ti.root.dense(ti.i, n).dense(ti.j, n).place(z, w)
+ ti.root.lazy_grad()
+
+ x.grad[None] = 3
+ for i in range(n):
+ y.grad[i] = 3
+ for j in range(n):
+ z.grad[i, j] = 5
+ w.grad[i, j] = 6
+
+ ti.clear_all_gradients()
+ assert impl.get_runtime().get_num_compiled_functions() == 3
+
+ assert x.grad[None] == 0
+ for i in range(n):
+ assert y.grad[i] == 0
+ for j in range(n):
+ assert z.grad[i, j] == 0
+ assert w.grad[i, j] == 0
+
+ ti.clear_all_gradients()
+ # No more kernel compilation
+ assert impl.get_runtime().get_num_compiled_functions() == 3
diff --git a/tests/_python_orig/test_cli.py b/tests/_python_orig/test_cli.py
new file mode 100644
index 000000000..e7ed85fba
--- /dev/null
+++ b/tests/_python_orig/test_cli.py
@@ -0,0 +1,221 @@
+import argparse
+import copy
+import sys
+from contextlib import contextmanager
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+from taichi.main import TaichiMain
+
+import taichi as ti
+
+
+@contextmanager
+def patch_sys_argv_helper(custom_argv: list):
+ """Temporarily patch sys.argv for testing."""
+ try:
+ cached_argv = copy.deepcopy(sys.argv)
+ sys.argv = custom_argv
+ yield sys.argv
+ finally:
+ sys.argv = cached_argv
+
+
+def test_cli_exit_one_with_no_command_provided():
+ with patch_sys_argv_helper(["ti"]):
+ cli = TaichiMain(test_mode=True)
+ assert cli() == 1
+
+
+def test_cli_exit_one_with_bogus_command_provided():
+ with patch_sys_argv_helper(["ti", "bogus-command-not-registered-yet"]):
+ cli = TaichiMain(test_mode=True)
+ assert cli() == 1
+
+
+def test_cli_can_dispatch_commands_to_methods_correctly():
+ with patch_sys_argv_helper(
+ ["ti", "example", "bogus_example_name_for_test"]):
+ with patch.object(TaichiMain, 'example',
+ return_value=None) as mock_method:
+ cli = TaichiMain(test_mode=False)
+ cli()
+ mock_method.assert_called_once_with(
+ ["bogus_example_name_for_test"])
+
+
+def test_cli_example():
+ with patch_sys_argv_helper(["ti", "example", "minimal"]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.name == "minimal"
+
+ with patch_sys_argv_helper(["ti", "example", "minimal.py"]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.name == "minimal"
+
+ with patch_sys_argv_helper(["ti", "example", "-s",
+ "minimal.py"]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.name == "minimal" and args.save == True
+
+ with patch_sys_argv_helper(["ti", "example", "-p",
+ "minimal.py"]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.name == "minimal" and args.print == True
+
+ with patch_sys_argv_helper(["ti", "example", "-P",
+ "minimal.py"]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.name == "minimal" and args.pretty_print == True
+
+
+def test_cli_gif():
+ with patch_sys_argv_helper(["ti", "gif", "-i", "video.mp4", "-f",
+ "30"]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.input_file == "video.mp4"
+ assert args.framerate == 30
+ assert args.output_file == "video.gif"
+
+ with patch_sys_argv_helper(["ti", "gif", "-i", "video.mp3", "-f",
+ "30"]) as custom_argv:
+ with pytest.raises(SystemExit) as pytest_wrapped_err:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert pytest_wrapped_err.__context__.type == argparse.ArgumentTypeError
+
+
+def test_cli_video_speed():
+ with patch_sys_argv_helper(
+ ["ti", "video_speed", "-i", "video.mp4", "-s", "2.0"]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.input_file == "video.mp4"
+ assert args.speed == 2.0
+ assert args.output_file == "video-sped.mp4"
+
+ with patch_sys_argv_helper(
+ ["ti", "video_speed", "-i", "video.mp3", "-s", "2.0"]) as custom_argv:
+ with pytest.raises(SystemExit) as pytest_wrapped_err:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert pytest_wrapped_err.__context__.type == argparse.ArgumentTypeError
+
+
+def test_cli_video_crop():
+ with patch_sys_argv_helper([
+ "ti", "video_crop", "-i", "video.mp4", "--x1", "10.0", "--x2",
+ "20.0", "--y1", "10.0", "--y2", "20.0"
+ ]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.input_file == "video.mp4"
+ assert args.x_begin == 10.0
+ assert args.x_end == 20.0
+ assert args.y_begin == 10.0
+ assert args.y_end == 20.0
+ assert args.output_file == "video-cropped.mp4"
+
+ with patch_sys_argv_helper([
+ "ti", "video_crop", "-i", "video.mp3", "--x1", "10.0", "--x2",
+ "20.0", "--y1", "10.0", "--y2", "20.0"
+ ]) as custom_argv:
+ with pytest.raises(SystemExit) as pytest_wrapped_err:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert pytest_wrapped_err.__context__.type == argparse.ArgumentTypeError
+
+
+def test_cli_video_scale():
+ with patch_sys_argv_helper(
+ ["ti", "video_scale", "-i", "video.mp4", "-w", "1.2"]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.input_file == "video.mp4"
+ assert args.ratio_width == 1.2
+ assert args.ratio_height == 1.2
+ assert args.output_file == "video-scaled.mp4"
+
+ with patch_sys_argv_helper([
+ "ti", "video_scale", "-i", "video.mp4", "-w", "1.2",
+ "--ratio-height", "1.5"
+ ]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.input_file == "video.mp4"
+ assert args.ratio_width == 1.2
+ assert args.ratio_height == 1.5
+ assert args.output_file == "video-scaled.mp4"
+
+ with patch_sys_argv_helper([
+ "ti", "video_scale", "-i", "video.mp3", "-w", "1.2",
+ "--ratio-height", "1.5"
+ ]) as custom_argv:
+ with pytest.raises(SystemExit) as pytest_wrapped_err:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert pytest_wrapped_err.__context__.type == argparse.ArgumentTypeError
+
+
+def test_cli_video():
+ with patch_sys_argv_helper(
+ ["ti", "video", "image.gif", "-o", "video.mp4", "-f",
+ "30"]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.inputs == ["image.gif"]
+ assert args.framerate == 30
+ assert isinstance(args.output_file, Path)
+ assert args.output_file.name == "video.mp4"
+
+ with patch_sys_argv_helper(["ti", "video", "-o", "video.mp4", "-f",
+ "30"]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert isinstance(args.inputs, list)
+ assert args.framerate == 30
+ assert isinstance(args.output_file, Path)
+ assert args.output_file.name == "video.mp4"
+
+
+def test_cli_regression():
+ with patch_sys_argv_helper(["ti", "regression", "a.py", "b.py",
+ "-g"]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.files == ["a.py", "b.py"]
+ assert args.gui == True
+
+
+def test_cli_benchmark():
+ with patch_sys_argv_helper(
+ ["ti", "benchmark", "a.py", "b.py", "-T", "-v", "-r2",
+ "-t4"]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.files == ["a.py", "b.py"]
+ assert args.tprt == True
+ assert args.verbose == True
+ assert args.rerun == "2"
+ assert args.threads == "4"
+
+
+def test_cli_debug():
+ with patch_sys_argv_helper(["ti", "debug", "a.py"]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.filename == "a.py"
+
+
+def test_cli_run():
+ with patch_sys_argv_helper(["ti", "run", "a.py"]) as custom_argv:
+ cli = TaichiMain(test_mode=True)
+ args = cli()
+ assert args.filename == "a.py"
diff --git a/tests/_python_orig/test_compare.py b/tests/_python_orig/test_compare.py
new file mode 100644
index 000000000..30327f78c
--- /dev/null
+++ b/tests/_python_orig/test_compare.py
@@ -0,0 +1,172 @@
+import pytest
+from taichi.lang import impl
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_compare_basics():
+ a = ti.field(ti.i32)
+ ti.root.dynamic(ti.i, 256).place(a)
+ b = ti.field(ti.i32, shape=())
+ c = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ b[None] = 3
+ c[None] = 5
+ a[0] = b[None] < c[None]
+ a[1] = b[None] <= c[None]
+ a[2] = b[None] > c[None]
+ a[3] = b[None] >= c[None]
+ a[4] = b[None] == c[None]
+ a[5] = b[None] != c[None]
+ a[6] = c[None] < b[None]
+ a[7] = c[None] <= b[None]
+ a[8] = c[None] > b[None]
+ a[9] = c[None] >= b[None]
+ a[10] = c[None] == b[None]
+ a[11] = c[None] != b[None]
+
+ func()
+ assert a[0]
+ assert a[1]
+ assert not a[2]
+ assert not a[3]
+ assert not a[4]
+ assert a[5]
+ assert not a[6]
+ assert not a[7]
+ assert a[8]
+ assert a[9]
+ assert not a[10]
+ assert a[11]
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_compare_equality():
+ a = ti.field(ti.i32)
+ ti.root.dynamic(ti.i, 256).place(a)
+ b = ti.field(ti.i32, shape=())
+ c = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ b[None] = 3
+ c[None] = 3
+ a[0] = b[None] < c[None]
+ a[1] = b[None] <= c[None]
+ a[2] = b[None] > c[None]
+ a[3] = b[None] >= c[None]
+ a[4] = b[None] == c[None]
+ a[5] = b[None] != c[None]
+ a[6] = c[None] < b[None]
+ a[7] = c[None] <= b[None]
+ a[8] = c[None] > b[None]
+ a[9] = c[None] >= b[None]
+ a[10] = c[None] == b[None]
+ a[11] = c[None] != b[None]
+
+ func()
+ assert not a[0]
+ assert a[1]
+ assert not a[2]
+ assert a[3]
+ assert a[4]
+ assert not a[5]
+ assert not a[6]
+ assert a[7]
+ assert not a[8]
+ assert a[9]
+ assert a[10]
+ assert not a[11]
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_no_duplicate_eval():
+ a = ti.field(ti.i32)
+ ti.root.dynamic(ti.i, 256).place(a)
+
+ @ti.kernel
+ def func():
+ a[2] = 0 <= ti.append(a.parent(), [], 10) < 1
+
+ func()
+ assert a[0] == 10
+ assert a[1] == 0 # not appended twice
+ assert a[2] # ti.append returns 0
+
+
+@test_utils.test()
+def test_no_duplicate_eval_func():
+ a = ti.field(ti.i32, ())
+ b = ti.field(ti.i32, ())
+
+ @ti.func
+ def why_this_foo_fail(n):
+ return ti.atomic_add(b[None], n)
+
+ def foo(n):
+ return ti.atomic_add(impl.subscript(b, None), n)
+
+ @ti.kernel
+ def func():
+ a[None] = 0 <= foo(2) < 1
+
+ func()
+ assert a[None] == 1
+ assert b[None] == 2
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_chain_compare():
+ a = ti.field(ti.i32)
+ ti.root.dynamic(ti.i, 256).place(a)
+ b = ti.field(ti.i32, shape=())
+ c = ti.field(ti.i32, shape=())
+ d = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ b[None] = 2
+ c[None] = 3
+ d[None] = 3
+ a[0] = c[None] == d[None] != b[None] < d[None] > b[None] >= b[
+ None] <= c[None]
+ a[1] = b[None] <= c[None] != d[None] > b[None] == b[None]
+
+ func()
+ assert a[0]
+ assert not a[1]
+
+
+@test_utils.test()
+def test_static_in():
+ @ti.kernel
+ def foo(a: ti.template()) -> ti.i32:
+ b = 0
+ if ti.static(a in [ti.i32, ti.u32]):
+ b = 1
+ elif ti.static(a not in [ti.f32, ti.f64]):
+ b = 2
+ return b
+
+ assert foo(ti.u32) == 1
+ assert foo(ti.i64) == 2
+ assert foo(ti.f32) == 0
+
+
+@test_utils.test()
+def test_non_static_in():
+ with pytest.raises(ti.TaichiCompilationError,
+ match='"In" is only supported inside `ti.static`.'):
+
+ @ti.kernel
+ def foo(a: ti.template()) -> ti.i32:
+ b = 0
+ if a in [ti.i32, ti.u32]:
+ b = 1
+ return b
+
+ foo(ti.i32)
diff --git a/tests/_python_orig/test_complex_struct.py b/tests/_python_orig/test_complex_struct.py
new file mode 100644
index 000000000..cc698e8eb
--- /dev/null
+++ b/tests/_python_orig/test_complex_struct.py
@@ -0,0 +1,174 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_complex_dense():
+ a = ti.field(ti.i32, shape=(4, 4))
+ b = ti.field(ti.i32, shape=(16, 16))
+ c = ti.field(ti.i32, shape=(16, 4))
+ d = ti.field(ti.i32, shape=(4, 4, 4))
+
+ w = ti.field(ti.i32)
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+ z = ti.field(ti.i32)
+
+ blk = ti.root.dense(ti.ij, 4)
+ blk.place(w)
+ blk.dense(ti.ij, 2).dense(ti.ij, 2).place(x)
+ blk.dense(ti.i, 4).place(y)
+ blk.dense(ti.k, 4).place(z)
+
+ @ti.kernel
+ def set_w():
+ for I in ti.grouped(ti.ndrange(4, 4)):
+ w[I] = 1
+
+ @ti.kernel
+ def set_x():
+ for I in ti.grouped(ti.ndrange(16, 16)):
+ x[I] = 2
+
+ @ti.kernel
+ def set_y():
+ for I in ti.grouped(ti.ndrange(16, 4)):
+ y[I] = 3
+
+ @ti.kernel
+ def set_z():
+ for I in ti.grouped(ti.ndrange(4, 4, 4)):
+ z[I] = 4
+
+ @ti.kernel
+ def set_a():
+ for I in ti.grouped(w):
+ a[I] = w[I]
+
+ @ti.kernel
+ def set_b():
+ for I in ti.grouped(x):
+ b[I] = x[I]
+
+ @ti.kernel
+ def set_c():
+ for I in ti.grouped(y):
+ c[I] = y[I]
+
+ @ti.kernel
+ def set_d():
+ for I in ti.grouped(z):
+ d[I] = z[I]
+
+ set_w()
+ set_x()
+ set_y()
+ set_z()
+
+ set_a()
+ set_b()
+ set_c()
+ set_d()
+
+ for i in range(4):
+ for j in range(4):
+ assert a[i, j] == 1
+
+ for i in range(16):
+ for j in range(16):
+ assert b[i, j] == 2
+
+ for i in range(16):
+ for j in range(4):
+ assert c[i, j] == 3
+
+ for i in range(4):
+ for j in range(4):
+ for k in range(4):
+ assert d[i, j, k] == 4
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_complex_pointer():
+ a = ti.field(ti.i32, shape=(4, 4))
+ b = ti.field(ti.i32, shape=(16, 16))
+ c = ti.field(ti.i32, shape=(16, 4))
+ d = ti.field(ti.i32, shape=(4, 4, 4))
+
+ w = ti.field(ti.i32)
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+ z = ti.field(ti.i32)
+
+ blk = ti.root.pointer(ti.ij, 4)
+ blk.place(w)
+ blk.pointer(ti.ij, 2).dense(ti.ij, 2).place(x)
+ blk.dense(ti.i, 4).place(y)
+ blk.dense(ti.k, 4).place(z)
+
+ @ti.kernel
+ def set_w():
+ for I in ti.grouped(ti.ndrange(4, 4)):
+ w[I] = 1
+
+ @ti.kernel
+ def set_x():
+ for I in ti.grouped(ti.ndrange(16, 16)):
+ x[I] = 2
+
+ @ti.kernel
+ def set_y():
+ for I in ti.grouped(ti.ndrange(16, 4)):
+ y[I] = 3
+
+ @ti.kernel
+ def set_z():
+ for I in ti.grouped(ti.ndrange(4, 4, 4)):
+ z[I] = 4
+
+ @ti.kernel
+ def set_a():
+ for I in ti.grouped(w):
+ a[I] = w[I]
+
+ @ti.kernel
+ def set_b():
+ for I in ti.grouped(x):
+ b[I] = x[I]
+
+ @ti.kernel
+ def set_c():
+ for I in ti.grouped(y):
+ c[I] = y[I]
+
+ @ti.kernel
+ def set_d():
+ for I in ti.grouped(z):
+ d[I] = z[I]
+
+ set_w()
+ set_x()
+ set_y()
+ set_z()
+
+ set_a()
+ set_b()
+ set_c()
+ set_d()
+
+ for i in range(4):
+ for j in range(4):
+ assert a[i, j] == 1
+
+ for i in range(16):
+ for j in range(16):
+ assert b[i, j] == 2
+
+ for i in range(16):
+ for j in range(4):
+ assert c[i, j] == 3
+
+ for i in range(4):
+ for j in range(4):
+ for k in range(4):
+ assert d[i, j, k] == 4
diff --git a/tests/_python_orig/test_constant_fold.py b/tests/_python_orig/test_constant_fold.py
new file mode 100644
index 000000000..98aca30e9
--- /dev/null
+++ b/tests/_python_orig/test_constant_fold.py
@@ -0,0 +1,19 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.async_mode, async_mode=True)
+def test_constant_fold():
+ n = 100
+
+ @ti.kernel
+ def series() -> int:
+ s = 0
+ for i in ti.static(range(n)):
+ a = i + 1
+ s += a * a
+ return s
+
+ # \sum_{i=1}^n (i^2) = n * (n + 1) * (2n + 1) / 6
+ expected = n * (n + 1) * (2 * n + 1) // 6
+ assert series() == expected
diff --git a/tests/_python_orig/test_continue.py b/tests/_python_orig/test_continue.py
new file mode 100644
index 000000000..cdb13d54c
--- /dev/null
+++ b/tests/_python_orig/test_continue.py
@@ -0,0 +1,149 @@
+import taichi as ti
+from tests import test_utils
+
+n = 1000
+
+
+@test_utils.test()
+def test_for_continue():
+ x = ti.field(ti.i32, shape=n)
+
+ @ti.kernel
+ def run():
+ # Launch just one thread
+ for _ in range(1):
+ for j in range(n):
+ if j % 2 == 0:
+ continue
+ x[j] = j
+
+ run()
+ xs = x.to_numpy()
+ for i in range(n):
+ expect = 0 if i % 2 == 0 else i
+ assert xs[i] == expect
+
+
+@test_utils.test()
+def test_while_continue():
+ x = ti.field(ti.i32, shape=n)
+
+ @ti.kernel
+ def run():
+ # Launch just one thread
+ for _ in range(1):
+ j = 0
+ while j < n:
+ oj = j
+ j += 1
+ if oj % 2 == 0:
+ continue
+ x[oj] = oj
+
+ run()
+ xs = x.to_numpy()
+ for i in range(n):
+ expect = 0 if i % 2 == 0 else i
+ assert xs[i] == expect
+
+
+@test_utils.test()
+def test_kernel_continue():
+ x = ti.field(ti.i32, shape=n)
+
+ @ti.kernel
+ def run():
+ for i in range(n):
+ if i % 2 == 0:
+ # At kernel level, this is the same as return
+ continue
+ x[i] = i
+
+ run()
+ xs = x.to_numpy()
+ for i in range(n):
+ expect = 0 if i % 2 == 0 else i
+ assert xs[i] == expect
+
+
+@test_utils.test()
+def test_unconditional_continue():
+ x = ti.field(ti.i32, shape=n)
+
+ @ti.kernel
+ def run():
+ # Launch just one thread
+ for _ in range(1):
+ for j in range(n):
+ continue
+ # pylint: disable=unreachable
+ x[j] = j
+
+ run()
+ xs = x.to_numpy()
+ for i in range(n):
+ assert xs[i] == 0
+
+
+@test_utils.test()
+def test_kernel_continue_in_nested_if():
+ x = ti.field(ti.i32, shape=n)
+
+ @ti.kernel
+ def run(a: ti.i32):
+ for i in range(1):
+ if a:
+ if a:
+ continue
+ if a:
+ if a:
+ continue
+ x[i] = i
+
+ x[0] = 1
+ run(1)
+ assert x[0] == 1
+ run(0)
+ assert x[0] == 0
+
+
+@test_utils.test()
+def test_kernel_continue_in_nested_if_2():
+ x = ti.field(ti.i32, shape=n)
+
+ @ti.kernel
+ def run(a: ti.i32):
+ for i in range(1):
+ if a:
+ if a:
+ continue
+ if a:
+ continue
+ x[i] = i
+
+ x[0] = 1
+ run(1)
+ assert x[0] == 1
+ run(0)
+ assert x[0] == 0
+
+
+@test_utils.test()
+def test_kernel_continue_in_nested_if_3():
+ x = ti.field(ti.i32, shape=n)
+
+ @ti.kernel
+ def run(a: ti.i32):
+ for i in range(1):
+ if a:
+ continue
+ if a:
+ if a:
+ continue
+ x[i] = i
+
+ x[0] = 1
+ run(1)
+ assert x[0] == 1
+ run(0)
+ assert x[0] == 0
diff --git a/tests/_python_orig/test_copy_from.py b/tests/_python_orig/test_copy_from.py
new file mode 100644
index 000000000..f546477e1
--- /dev/null
+++ b/tests/_python_orig/test_copy_from.py
@@ -0,0 +1,25 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_scalar():
+ n = 16
+
+ x = ti.field(ti.i32, shape=n)
+ y = ti.field(ti.i32, shape=n)
+
+ x[1] = 2
+
+ y[0] = 1
+ y[2] = 3
+
+ x.copy_from(y)
+
+ assert x[0] == 1
+ assert x[1] == 0
+ assert x[2] == 3
+
+ assert y[0] == 1
+ assert y[1] == 0
+ assert y[2] == 3
diff --git a/tests/_python_orig/test_cuda_internals.py b/tests/_python_orig/test_cuda_internals.py
new file mode 100644
index 000000000..1c2e3e3ae
--- /dev/null
+++ b/tests/_python_orig/test_cuda_internals.py
@@ -0,0 +1,37 @@
+from taichi.lang import impl
+
+import taichi as ti
+from tests import test_utils
+
+# TODO: these are not really tests...
+
+
+@test_utils.test(arch=ti.cuda)
+def test_do_nothing():
+ @ti.kernel
+ def test():
+ for i in range(10):
+ impl.call_internal("do_nothing")
+
+ test()
+
+
+@test_utils.test(arch=ti.cuda)
+def test_active_mask():
+ @ti.kernel
+ def test():
+ for i in range(48):
+ if i % 2 == 0:
+ impl.call_internal("test_active_mask")
+
+ test()
+
+
+@test_utils.test(arch=ti.cuda)
+def test_shfl_down():
+ @ti.kernel
+ def test():
+ for i in range(32):
+ impl.call_internal("test_shfl")
+
+ test()
diff --git a/tests/_python_orig/test_custom_float.py b/tests/_python_orig/test_custom_float.py
new file mode 100644
index 000000000..aae233620
--- /dev/null
+++ b/tests/_python_orig/test_custom_float.py
@@ -0,0 +1,92 @@
+import math
+
+from pytest import approx
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.quant_basic)
+def test_custom_float():
+ cft = ti.types.quantized_types.quant.fixed(frac=32, num_range=2)
+ x = ti.field(dtype=cft)
+
+ ti.root.bit_struct(num_bits=32).place(x)
+
+ @ti.kernel
+ def foo():
+ x[None] = 0.7
+ print(x[None])
+ x[None] = x[None] + 0.4
+
+ foo()
+ assert x[None] == approx(1.1)
+ x[None] = 0.64
+ assert x[None] == approx(0.64)
+ x[None] = 0.66
+ assert x[None] == approx(0.66)
+
+
+@test_utils.test(require=ti.extension.quant_basic)
+def test_custom_matrix_rotation():
+ cft = ti.types.quantized_types.quant.fixed(frac=16, num_range=1.2)
+
+ x = ti.Matrix.field(2, 2, dtype=cft)
+
+ ti.root.bit_struct(num_bits=32).place(x.get_scalar_field(0, 0),
+ x.get_scalar_field(0, 1))
+ ti.root.bit_struct(num_bits=32).place(x.get_scalar_field(1, 0),
+ x.get_scalar_field(1, 1))
+
+ x[None] = [[1.0, 0.0], [0.0, 1.0]]
+
+ @ti.kernel
+ def rotate_18_degrees():
+ angle = math.pi / 10
+ x[None] = x[None] @ ti.Matrix(
+ [[ti.cos(angle), ti.sin(angle)], [-ti.sin(angle),
+ ti.cos(angle)]])
+
+ for i in range(5):
+ rotate_18_degrees()
+ assert x[None][0, 0] == approx(0, abs=1e-4)
+ assert x[None][0, 1] == approx(1, abs=1e-4)
+ assert x[None][1, 0] == approx(-1, abs=1e-4)
+ assert x[None][1, 1] == approx(0, abs=1e-4)
+
+
+@test_utils.test(require=ti.extension.quant_basic)
+def test_custom_float_implicit_cast():
+ ci13 = ti.types.quantized_types.quant.int(bits=13)
+ cft = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=ci13, scale=0.1)
+ x = ti.field(dtype=cft)
+
+ ti.root.bit_struct(num_bits=32).place(x)
+
+ @ti.kernel
+ def foo():
+ x[None] = 10
+
+ foo()
+ assert x[None] == approx(10.0)
+
+
+@test_utils.test(require=ti.extension.quant_basic)
+def test_cache_read_only():
+ ci15 = ti.types.quantized_types.quant.int(bits=15)
+ cft = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=ci15, scale=0.1)
+ x = ti.field(dtype=cft)
+
+ ti.root.bit_struct(num_bits=32).place(x)
+
+ @ti.kernel
+ def test(data: ti.f32):
+ ti.cache_read_only(x)
+ assert x[None] == data
+
+ x[None] = 0.7
+ test(0.7)
+ x[None] = 1.2
+ test(1.2)
diff --git a/tests/_python_orig/test_custom_float_exponents.py b/tests/_python_orig/test_custom_float_exponents.py
new file mode 100644
index 000000000..6ede9f03a
--- /dev/null
+++ b/tests/_python_orig/test_custom_float_exponents.py
@@ -0,0 +1,135 @@
+import numpy as np
+import pytest
+from pytest import approx
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.quant)
+def test_custom_float_unsigned():
+ cu13 = ti.types.quantized_types.quant.int(13, False)
+ exp = ti.types.quantized_types.quant.int(6, False)
+ cft = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cu13, exponent_type=exp, scale=1)
+ x = ti.field(dtype=cft)
+
+ ti.root.bit_struct(num_bits=32).place(x)
+
+ tests = [
+ 0, 1 / 1024, 1.75 / 1024, 0.25, 0.5, 0.75, 1, 2, 3, 4, 5, 6, 7, 128,
+ 256, 512, 1024
+ ]
+
+ assert x[None] == 0
+
+ for v in tests:
+ x[None] = v
+ assert x[None] == v
+
+
+@test_utils.test(require=ti.extension.quant)
+def test_custom_float_signed():
+ cu13 = ti.types.quantized_types.quant.int(13, True)
+ exp = ti.types.quantized_types.quant.int(6, False)
+ cft = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cu13, exponent_type=exp, scale=1)
+ x = ti.field(dtype=cft)
+
+ ti.root.bit_struct(num_bits=32).place(x)
+
+ tests = [0, 0.125, 0.5, 2, 4, 6, 7, 8, 9]
+
+ assert x[None] == 0
+
+ for v in tests:
+ x[None] = v
+ assert x[None] == v
+
+ x[None] = -v
+ assert x[None] == -v
+
+ ftz_tests = [1e-30, 1e-20, 1e-10, 1e-2]
+ for v in ftz_tests:
+ x[None] = v
+ assert x[None] == approx(v, abs=1e-5)
+
+ x[None] = -v
+ assert x[None] == approx(-v, abs=1e-5)
+
+
+@pytest.mark.parametrize('digits_bits', [23, 24])
+@test_utils.test(require=ti.extension.quant)
+def test_custom_float_precision(digits_bits):
+ cu24 = ti.types.quantized_types.quant.int(digits_bits, True)
+ exp = ti.types.quantized_types.quant.int(8, False)
+ cft = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cu24, exponent_type=exp, scale=1)
+ x = ti.field(dtype=cft)
+
+ ti.root.bit_struct(num_bits=32).place(x)
+
+ tests = [np.float32(np.pi), np.float32(np.pi * (1 << 100))]
+
+ for v in tests:
+ x[None] = v
+ if digits_bits == 24:
+ # Sufficient digits
+ assert x[None] == v
+ else:
+ # The binary representation of np.float32(np.pi) ends with 1, so removing one digit will result in a different number.
+ assert x[None] != v
+ assert x[None] == pytest.approx(v, rel=3e-7)
+
+
+@pytest.mark.parametrize('signed', [True, False])
+@test_utils.test(require=ti.extension.quant)
+def test_custom_float_truncation(signed):
+ cit = ti.types.quantized_types.quant.int(2, signed)
+ exp = ti.types.quantized_types.quant.int(5, False)
+ cft = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cit, exponent_type=exp, scale=1)
+ x = ti.field(dtype=cft)
+
+ ti.root.bit_struct(num_bits=32).place(x)
+
+ # Sufficient digits
+ for v in [1, 1.5]:
+ x[None] = v
+ assert x[None] == v
+
+ x[None] = 1.75
+ if signed:
+ # Insufficient digits
+ assert x[None] == 2
+ else:
+ # Sufficient digits
+ assert x[None] == 1.75
+
+ # Insufficient digits
+ x[None] = 1.625
+ if signed:
+ assert x[None] == 1.5
+ else:
+ assert x[None] == 1.75
+
+
+@test_utils.test(require=ti.extension.quant)
+def test_custom_float_atomic_demotion():
+ cit = ti.types.quantized_types.quant.int(2, True)
+ exp = ti.types.quantized_types.quant.int(5, False)
+ cft = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cit, exponent_type=exp, scale=1)
+ x = ti.field(dtype=cft)
+
+ ti.root.bit_struct(num_bits=32).place(x)
+
+ @ti.kernel
+ def foo():
+ for i in range(1):
+ x[None] += 1
+
+ foo()
+ foo()
+
+ assert x[None] == 2
diff --git a/tests/_python_orig/test_custom_float_shared_exp.py b/tests/_python_orig/test_custom_float_shared_exp.py
new file mode 100644
index 000000000..02e9da00b
--- /dev/null
+++ b/tests/_python_orig/test_custom_float_shared_exp.py
@@ -0,0 +1,167 @@
+import pytest
+from pytest import approx
+
+import taichi as ti
+from tests import test_utils
+
+
+@pytest.mark.parametrize('exponent_bits', [5, 6, 7, 8])
+@test_utils.test(require=ti.extension.quant)
+def test_shared_exponents(exponent_bits):
+ exp = ti.types.quantized_types.quant.int(exponent_bits, False)
+ cit1 = ti.types.quantized_types.quant.int(10, False)
+ cit2 = ti.types.quantized_types.quant.int(14, False)
+ cft1 = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cit1, exponent_type=exp, scale=1)
+ cft2 = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cit2, exponent_type=exp, scale=1)
+ a = ti.field(dtype=cft1)
+ b = ti.field(dtype=cft2)
+ ti.root.bit_struct(num_bits=32).place(a, b, shared_exponent=True)
+
+ assert a[None] == 0.0
+ assert b[None] == 0.0
+
+ a[None] = 10
+ assert a[None] == 10.0
+ assert b[None] == 0.0
+
+ a[None] = 0
+ assert a[None] == 0.0
+ assert b[None] == 0.0
+
+ @ti.kernel
+ def foo(x: ti.f32, y: ti.f32):
+ a[None] = x
+ b[None] = y
+
+ foo(3.2, 0.25)
+
+ assert a[None] == approx(3.2, rel=1e-3)
+ assert b[None] == approx(0.25, rel=2e-2)
+ a[None] = 0.27
+ assert a[None] == approx(0.27, rel=1e-2)
+ assert b[None] == approx(0.25, rel=2e-2)
+ a[None] = 100
+ assert a[None] == approx(100, rel=1e-3)
+ assert b[None] == approx(0.25, rel=1e-2)
+
+ b[None] = 0
+ assert a[None] == approx(100, rel=1e-3)
+ assert b[None] == 0
+
+ foo(0, 0)
+ assert a[None] == 0.0
+ assert b[None] == 0.0
+
+ # test flush to zero
+ foo(1000, 1e-6)
+ assert a[None] == 1000.0
+ assert b[None] == 0.0
+
+ foo(1000, 1000)
+ assert a[None] == 1000.0
+ assert b[None] == 1000.0
+
+ foo(1e-30, 1e-30)
+ if exponent_bits == 8:
+ assert a[None] == approx(1e-30, 1e-3)
+ assert b[None] == approx(1e-30, 1e-4)
+ else:
+ # Insufficient exponent bits: should flush to zero
+ assert a[None] == 0
+ assert b[None] == 0
+
+
+@pytest.mark.parametrize('exponent_bits', [5, 6, 7, 8])
+@test_utils.test(require=ti.extension.quant)
+def test_shared_exponent_add(exponent_bits):
+ exp = ti.types.quantized_types.quant.int(exponent_bits, False)
+ cit1 = ti.types.quantized_types.quant.int(10, False)
+ cit2 = ti.types.quantized_types.quant.int(14, False)
+ cft1 = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cit1, exponent_type=exp, scale=1)
+ cft2 = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cit2, exponent_type=exp, scale=1)
+ a = ti.field(dtype=cft1)
+ b = ti.field(dtype=cft2)
+ ti.root.bit_struct(num_bits=32).place(a, b, shared_exponent=True)
+
+ @ti.kernel
+ def foo(x: ti.f32, y: ti.f32):
+ a[None] = x
+ b[None] = y
+
+ a[None] = 4
+ assert a[None] == 4
+ assert b[None] == 0
+ b[None] = 3
+ assert a[None] == 4
+ assert b[None] == 3
+
+ b[None] += 1
+
+ assert a[None] == 4
+ assert b[None] == 4
+
+ for i in range(100):
+ a[None] += 4
+ b[None] += 1
+ assert a[None] == 4 + (i + 1) * 4
+ assert b[None] == 4 + (i + 1)
+
+
+@pytest.mark.parametrize('exponent_bits', [5, 6, 7, 8])
+@test_utils.test(require=ti.extension.quant)
+def test_shared_exponent_borrow(exponent_bits):
+ exp = ti.types.quantized_types.quant.int(exponent_bits, False)
+ cit1 = ti.types.quantized_types.quant.int(10, False)
+ cit2 = ti.types.quantized_types.quant.int(14, False)
+ cft1 = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cit1, exponent_type=exp, scale=1)
+ cft2 = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cit2, exponent_type=exp, scale=1)
+ a = ti.field(dtype=cft1)
+ b = ti.field(dtype=cft2)
+ ti.root.bit_struct(num_bits=32).place(a, b, shared_exponent=True)
+
+ @ti.kernel
+ def foo(x: ti.f32, y: ti.f32):
+ a[None] = x
+ b[None] = y
+
+ def inc():
+ a[None] += 1
+ b[None] -= 1
+
+ foo(0, 100)
+
+ for i in range(100):
+ assert a[None] == i
+ assert b[None] == 100 - i
+ inc()
+
+
+@pytest.mark.parametrize('exponent_bits', [5, 6, 7, 8])
+@test_utils.test(require=ti.extension.quant)
+def test_negative(exponent_bits):
+ exp = ti.types.quantized_types.quant.int(exponent_bits, False)
+ cit1 = ti.types.quantized_types.quant.int(10, False)
+ cit2 = ti.types.quantized_types.quant.int(14, True)
+ cft1 = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cit1, exponent_type=exp, scale=1)
+ cft2 = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cit2, exponent_type=exp, scale=1)
+ a = ti.field(dtype=cft1)
+ b = ti.field(dtype=cft2)
+ ti.root.bit_struct(num_bits=32).place(a, b, shared_exponent=True)
+
+ a[None] = 37
+ assert a[None] == 37
+ b[None] = -123
+ assert b[None] == -123
+
+
+# TODO: test precision
+# TODO: make sure unsigned has one more effective significand bit
+# TODO: test shared exponent floats with custom int in a single bit struct
diff --git a/tests/_python_orig/test_custom_float_time_integration.py b/tests/_python_orig/test_custom_float_time_integration.py
new file mode 100644
index 000000000..00906efec
--- /dev/null
+++ b/tests/_python_orig/test_custom_float_time_integration.py
@@ -0,0 +1,63 @@
+import math
+
+import pytest
+from pytest import approx
+
+import taichi as ti
+from tests import test_utils
+
+
+@pytest.mark.parametrize('use_cft,use_exponent,use_shared_exp',
+ [(False, False, False), (True, False, False),
+ (True, True, False), (True, True, True)])
+@test_utils.test(require=ti.extension.quant)
+def test_custom_float_time_integration(use_cft, use_exponent, use_shared_exp):
+ if use_cft:
+ if use_exponent:
+ exp = ti.types.quantized_types.quant.int(6, False)
+ cit = ti.types.quantized_types.quant.int(13, True)
+ cft = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cit, exponent_type=exp, scale=1)
+ x = ti.Vector.field(2, dtype=cft)
+ if use_shared_exp:
+ ti.root.bit_struct(num_bits=32).place(x, shared_exponent=True)
+ else:
+ ti.root.bit_struct(num_bits=32).place(x.get_scalar_field(0))
+ ti.root.bit_struct(num_bits=32).place(x.get_scalar_field(1))
+ else:
+ cit = ti.types.quantized_types.quant.int(16, True)
+ cft = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cit, scale=1 / 2**14)
+ x = ti.Vector.field(2, dtype=cft)
+ ti.root.bit_struct(num_bits=32).place(x)
+ else:
+ x = ti.Vector.field(2, dtype=ti.f32, shape=())
+
+ @ti.func
+ def v_at(p):
+ return ti.Vector([-p[1], p[0]])
+
+ @ti.kernel
+ def advance(dt: ti.f32):
+ v_mid = v_at(x[None] + 0.5 * dt * v_at(x[None]))
+ x[None] = x[None] + v_mid * dt
+
+ x[None] = [1, 0]
+ num_steps = 1000
+ dt = math.pi * 2 / num_steps
+ px = []
+ py = []
+
+ N = 1
+
+ for i in range(num_steps * N):
+ advance(dt)
+ px.append(x[None][0])
+ py.append(x[None][1])
+
+ assert px[num_steps // 2 - 1] == approx(-1, abs=2e-2)
+ assert py[num_steps // 2 - 1] == approx(0, abs=2e-2)
+
+ assert px[-1] == approx(1, abs=2e-2)
+ # TODO: why large error here?
+ assert py[-1] == approx(0, abs=3e-2)
diff --git a/tests/_python_orig/test_custom_int.py b/tests/_python_orig/test_custom_int.py
new file mode 100644
index 000000000..b75d366b8
--- /dev/null
+++ b/tests/_python_orig/test_custom_int.py
@@ -0,0 +1,17 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.quant_basic)
+def test_custom_int_implicit_cast():
+ ci13 = ti.types.quantized_types.quant.int(13, True)
+ x = ti.field(dtype=ci13)
+
+ ti.root.bit_struct(num_bits=32).place(x)
+
+ @ti.kernel
+ def foo():
+ x[None] = 10.3
+
+ foo()
+ assert x[None] == 10
diff --git a/tests/_python_orig/test_custom_struct.py b/tests/_python_orig/test_custom_struct.py
new file mode 100644
index 000000000..657e8f682
--- /dev/null
+++ b/tests/_python_orig/test_custom_struct.py
@@ -0,0 +1,357 @@
+import numpy as np
+from pytest import approx
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_struct_member_access():
+ n = 32
+
+ x = ti.Struct.field({"a": ti.f32, "b": ti.f32}, shape=(n, ))
+ y = ti.Struct.field({"a": ti.f32, "b": ti.f32})
+
+ ti.root.dense(ti.i, n // 4).dense(ti.i, 4).place(y)
+
+ @ti.kernel
+ def init():
+ for i in x:
+ x[i].a = i
+ y[i].a = i
+
+ @ti.kernel
+ def run_taichi_scope():
+ for i in x:
+ x[i].b = x[i].a
+
+ def run_python_scope():
+ for i in range(n):
+ y[i].b = y[i].a * 2 + 1
+
+ init()
+ run_taichi_scope()
+ for i in range(n):
+ assert x[i].b == i
+ run_python_scope()
+ for i in range(n):
+ assert y[i].b == i * 2 + 1
+
+
+@test_utils.test()
+def test_struct_whole_access():
+ n = 32
+
+ # also tests implicit cast
+ x = ti.Struct.field({"a": ti.i32, "b": ti.f32}, shape=(n, ))
+ y = ti.Struct.field({"a": ti.f32, "b": ti.i32})
+
+ ti.root.dense(ti.i, n // 4).dense(ti.i, 4).place(y)
+
+ @ti.kernel
+ def init():
+ for i in x:
+ x[i] = ti.Struct(a=2 * i, b=1.01 * i)
+
+ @ti.kernel
+ def run_taichi_scope():
+ for i in x:
+ # element-wise ops only work in Taichi scope
+ y[i] = x[i] * 2 + 1
+
+ def run_python_scope():
+ for i in range(n):
+ y[i] = ti.Struct(a=x[i].a, b=int(x[i].b))
+
+ init()
+ for i in range(n):
+ assert x[i].a == 2 * i
+ assert x[i].b == approx(1.01 * i, rel=1e-4)
+ run_taichi_scope()
+ for i in range(n):
+ assert y[i].a == 4 * i + 1
+ assert y[i].b == int((1.01 * i) * 2 + 1)
+ run_python_scope()
+ for i in range(n):
+ assert y[i].a == 2 * i
+ assert y[i].b == int(1.01 * i)
+
+
+@test_utils.test()
+def test_struct_fill():
+ n = 32
+
+ # also tests implicit cast
+ x = ti.Struct.field({
+ "a": ti.f32,
+ "b": ti.types.vector(3, ti.i32)
+ },
+ shape=(n, ))
+
+ def fill_each():
+ x.a.fill(1.0)
+ x.b.fill(1.5)
+
+ def fill_all():
+ x.fill(2.5)
+
+ @ti.kernel
+ def fill_elements():
+ for i in x:
+ x[i].fill(i + 0.5)
+
+ fill_each()
+ for i in range(n):
+ assert x[i].a == 1.0
+ assert x[i].b[0] == 1 and x[i].b[1] == 1 and x[i].b[2] == 1
+ fill_all()
+ for i in range(n):
+ assert x[i].a == 2.5
+ assert x[i].b[0] == 2 and x[i].b[1] == 2 and x[i].b[2] == 2
+ fill_elements()
+ for i in range(n):
+ assert x[i].a == i + 0.5
+ assert np.allclose(x[i].b.to_numpy(), int(x[i].a))
+
+
+@test_utils.test()
+def test_matrix_type():
+ n = 32
+ vec2f = ti.types.vector(2, ti.f32)
+ vec3i = ti.types.vector(3, ti.i32)
+ x = vec3i.field()
+ ti.root.dense(ti.i, n).place(x)
+
+ @ti.kernel
+ def run_taichi_scope():
+ for i in x:
+ v = vec2f(i + 0.2)
+ # also tests implicit cast
+ x[i] = vec3i(v, i + 1.2)
+
+ def run_python_scope():
+ for i in range(n):
+ v = vec2f(i + 0.2)
+ x[i] = vec3i(i + 1.8, v)
+
+ run_taichi_scope()
+ for i in range(n):
+ assert np.allclose(x[i].to_numpy(), np.array([i, i, i + 1]))
+ run_python_scope()
+ for i in range(n):
+ assert np.allclose(x[i].to_numpy(), np.array([i + 1, i, i]))
+
+
+@test_utils.test()
+def test_struct_type():
+ n = 32
+ vec3f = ti.types.vector(3, float)
+ line3f = ti.types.struct(linedir=vec3f, length=float)
+ mystruct = ti.types.struct(line=line3f, idx=int)
+ x = mystruct.field(shape=(n, ))
+
+ @ti.kernel
+ def init_taichi_scope():
+ for i in x:
+ x[i] = mystruct(1)
+
+ def init_python_scope():
+ for i in range(n):
+ x[i] = mystruct(3)
+
+ @ti.kernel
+ def run_taichi_scope():
+ for i in x:
+ v = vec3f(1)
+ line = line3f(linedir=v, length=i + 0.5)
+ x[i] = mystruct(line=line, idx=i)
+
+ def run_python_scope():
+ for i in range(n):
+ v = vec3f(1)
+ x[i] = ti.Struct({
+ "line": {
+ "linedir": v,
+ "length": i + 0.5
+ },
+ "idx": i
+ })
+
+ init_taichi_scope()
+ for i in range(n):
+ assert x[i].idx == 1
+ assert np.allclose(x[i].line.linedir.to_numpy(), 1.0)
+ assert x[i].line.length == 1.0
+ run_taichi_scope()
+ for i in range(n):
+ assert x[i].idx == i
+ assert np.allclose(x[i].line.linedir.to_numpy(), 1.0)
+ assert x[i].line.length == i + 0.5
+ init_python_scope()
+ for i in range(n):
+ assert x[i].idx == 3
+ assert np.allclose(x[i].line.linedir.to_numpy(), 3.0)
+ assert x[i].line.length == 3.0
+ run_python_scope()
+ for i in range(n):
+ assert x[i].idx == i
+ assert np.allclose(x[i].line.linedir.to_numpy(), 1.0)
+ assert x[i].line.length == i + 0.5
+ x.fill(5)
+ for i in range(n):
+ assert x[i].idx == 5
+ assert np.allclose(x[i].line.linedir.to_numpy(), 5.0)
+ assert x[i].line.length == 5.0
+
+
+@test_utils.test()
+def test_struct_assign():
+ n = 32
+ vec3f = ti.types.vector(3, float)
+ line3f = ti.types.struct(linedir=vec3f, length=float)
+ mystruct = ti.types.struct(line=line3f, idx=int)
+ x = mystruct.field(shape=(n, ))
+ y = line3f.field(shape=(n, ))
+
+ @ti.kernel
+ def init():
+ for i in y:
+ y[i] = line3f(linedir=vec3f(1), length=i + 0.5)
+
+ @ti.kernel
+ def run_taichi_scope():
+ for i in x:
+ x[i].idx = i
+ x[i].line = y[i]
+
+ def run_python_scope():
+ for i in range(n):
+ x[i].idx = i
+ x[i].line = y[i]
+
+ init()
+ run_taichi_scope()
+ for i in range(n):
+ assert x[i].idx == i
+ assert np.allclose(x[i].line.linedir.to_numpy(), 1.0)
+ assert x[i].line.length == i + 0.5
+ x.fill(5)
+ run_python_scope()
+ for i in range(n):
+ assert x[i].idx == i
+ assert np.allclose(x[i].line.linedir.to_numpy(), 1.0)
+ assert x[i].line.length == i + 0.5
+
+
+@test_utils.test()
+def test_compound_type_implicit_cast():
+ vec2i = ti.types.vector(2, int)
+ vec2f = ti.types.vector(2, float)
+ structi = ti.types.struct(a=int, b=vec2i)
+ structf = ti.types.struct(a=float, b=vec2f)
+
+ @ti.kernel
+ def f2i_taichi_scope() -> int:
+ s = structi(2.5)
+ return s.a + s.b[0] + s.b[1]
+
+ def f2i_python_scope():
+ s = structi(2.5)
+ return s.a + s.b[0] + s.b[1]
+
+ @ti.kernel
+ def i2f_taichi_scope() -> float:
+ s = structf(2)
+ return s.a + s.b[0] + s.b[1]
+
+ def i2f_python_scope():
+ s = structf(2)
+ return s.a + s.b[0] + s.b[1]
+
+ int_value = f2i_taichi_scope()
+ assert type(int_value) == int and int_value == 6
+ int_value = f2i_python_scope()
+ assert type(int_value) == int and int_value == 6
+ float_value = i2f_taichi_scope()
+ assert type(float_value) == float and float_value == approx(6.0, rel=1e-4)
+ float_value = i2f_python_scope()
+ assert type(float_value) == float and float_value == approx(6.0, rel=1e-4)
+
+
+@test_utils.test()
+def test_local_struct_assign():
+ n = 32
+ vec3f = ti.types.vector(3, float)
+ line3f = ti.types.struct(linedir=vec3f, length=float)
+ mystruct = ti.types.struct(line=line3f, idx=int)
+
+ @ti.kernel
+ def run_taichi_scope():
+ y = line3f(0)
+ x = mystruct(0)
+ x.idx = 0
+ x.line = y
+
+ def run_python_scope():
+ y = line3f(0)
+ x = mystruct(0)
+ x.idx = 0
+ x.line = y
+
+ run_taichi_scope()
+ run_python_scope()
+
+
+@test_utils.test(debug=True)
+def test_copy_python_scope_struct_to_taichi_scope():
+ a = ti.Struct({'a': 2, 'b': 3})
+
+ @ti.kernel
+ def test():
+ b = a
+ assert b.a == 2
+ assert b.b == 3
+ b = ti.Struct({'a': 3, 'b': 4})
+ assert b.a == 3
+ assert b.b == 4
+
+ test()
+
+
+@test_utils.test(debug=True)
+def test_copy_struct_field_element_to_taichi_scope():
+ a = ti.Struct.field({'a': ti.i32, 'b': ti.i32}, shape=())
+ a[None].a = 2
+ a[None].b = 3
+
+ @ti.kernel
+ def test():
+ b = a[None]
+ assert b.a == 2
+ assert b.b == 3
+ b.a = 5
+ b.b = 9
+ assert b.a == 5
+ assert b.b == 9
+ assert a[None].a == 2
+ assert a[None].b == 3
+
+ test()
+
+
+@test_utils.test(debug=True)
+def test_copy_struct_in_taichi_scope():
+ @ti.kernel
+ def test():
+ a = ti.Struct({'a': 2, 'b': 3})
+ b = a
+ assert b.a == 2
+ assert b.b == 3
+ b.a = 5
+ b.b = 9
+ assert b.a == 5
+ assert b.b == 9
+ assert a.a == 2
+ assert a.b == 3
+
+ test()
diff --git a/tests/_python_orig/test_custom_type_atomics.py b/tests/_python_orig/test_custom_type_atomics.py
new file mode 100644
index 000000000..1e810963b
--- /dev/null
+++ b/tests/_python_orig/test_custom_type_atomics.py
@@ -0,0 +1,93 @@
+from pytest import approx
+
+import taichi as ti
+from tests import test_utils
+
+
+# TODO: remove excluding of ti.metal.
+@test_utils.test(require=ti.extension.quant_basic,
+ exclude=[ti.metal],
+ debug=True)
+def test_custom_int_atomics():
+ ci13 = ti.types.quantized_types.quant.int(13, True)
+ ci5 = ti.types.quantized_types.quant.int(5, True)
+ cu2 = ti.types.quantized_types.quant.int(2, False)
+
+ x = ti.field(dtype=ci13)
+ y = ti.field(dtype=ci5)
+ z = ti.field(dtype=cu2)
+
+ ti.root.bit_struct(num_bits=32).place(x, y, z)
+
+ x[None] = 3
+ y[None] = 2
+ z[None] = 0
+
+ @ti.kernel
+ def foo():
+ for i in range(10):
+ x[None] += 4
+
+ for j in range(5):
+ y[None] -= 1
+
+ for k in range(3):
+ z[None] += 1
+
+ foo()
+
+ assert x[None] == 43
+ assert y[None] == -3
+ assert z[None] == 3
+
+
+@test_utils.test(require=[ti.extension.quant_basic, ti.extension.data64],
+ debug=True)
+def test_custom_int_atomics_b64():
+ ci13 = ti.types.quantized_types.quant.int(13, True)
+
+ x = ti.field(dtype=ci13)
+
+ ti.root.bit_array(ti.i, 4, num_bits=64).place(x)
+
+ x[0] = 100
+ x[1] = 200
+ x[2] = 300
+
+ @ti.kernel
+ def foo():
+ for i in range(9):
+ x[i % 3] += i
+
+ foo()
+
+ assert x[0] == 109
+ assert x[1] == 212
+ assert x[2] == 315
+
+
+@test_utils.test(require=ti.extension.quant_basic, debug=True)
+def test_custom_float_atomics():
+ ci13 = ti.types.quantized_types.quant.int(13, True)
+ ci19 = ti.types.quantized_types.quant.int(19, False)
+ cft13 = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=ci13, scale=0.1)
+ cft19 = ti.types.quantized_types.type_factory.custom_float(
+ significand_type=ci19, scale=0.1)
+
+ x = ti.field(dtype=cft13)
+ y = ti.field(dtype=cft19)
+
+ ti.root.bit_struct(num_bits=32).place(x, y)
+
+ @ti.kernel
+ def foo():
+ x[None] = 0.7
+ y[None] = 123.4
+ for _ in range(10):
+ x[None] -= 0.4
+ y[None] += 100.1
+
+ foo()
+ assert x[None] == approx(-3.3)
+ assert y[None] == approx(1124.4)
diff --git a/tests/_python_orig/test_customized_grad.py b/tests/_python_orig/test_customized_grad.py
new file mode 100644
index 000000000..d813c6a0e
--- /dev/null
+++ b/tests/_python_orig/test_customized_grad.py
@@ -0,0 +1,227 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_customized_kernels_tape():
+ x = ti.field(ti.f32)
+ total = ti.field(ti.f32)
+
+ n = 128
+
+ ti.root.dense(ti.i, n).place(x)
+ ti.root.place(total)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func(mul: ti.f32):
+ for i in range(n):
+ ti.atomic_add(total[None], x[i] * mul)
+
+ @ti.ad.grad_replaced
+ def forward(mul):
+ func(mul)
+ func(mul)
+
+ @ti.ad.grad_for(forward)
+ def backward(mul):
+ func.grad(mul)
+
+ with ti.Tape(loss=total):
+ forward(4)
+ assert x.grad[0] == 4
+
+
+@test_utils.test()
+def test_customized_kernels_grad():
+ x = ti.field(ti.f32)
+ total = ti.field(ti.f32)
+
+ n = 128
+
+ ti.root.dense(ti.i, n).place(x)
+ ti.root.place(total)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func(mul: ti.f32):
+ for i in range(n):
+ ti.atomic_add(total[None], x[i] * mul)
+
+ @ti.ad.grad_replaced
+ def forward(mul):
+ func(mul)
+ func(mul)
+
+ @ti.ad.grad_for(forward)
+ def backward(mul):
+ func.grad(mul)
+
+ total.grad[None] = 1
+ forward(4)
+ forward.grad(4)
+ assert x.grad[0] == 4
+
+
+@test_utils.test()
+def test_customized_kernels_indirect():
+ x = ti.field(ti.f32)
+ total = ti.field(ti.f32)
+
+ n = 128
+
+ ti.root.dense(ti.i, n).place(x)
+ ti.root.place(total)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func(mul: ti.f32):
+ for i in range(n):
+ ti.atomic_add(total[None], x[i] * mul)
+
+ def func_proxy(mul):
+ func(mul)
+
+ @ti.ad.grad_replaced
+ def forward(mul):
+ func_proxy(mul)
+ func_proxy(mul)
+
+ @ti.ad.grad_for(forward)
+ def backward(mul):
+ func.grad(mul)
+
+ with ti.Tape(loss=total):
+ forward(4)
+ assert x.grad[0] == 4
+
+
+@test_utils.test()
+def test_customized_kernels_oop():
+ @ti.data_oriented
+ class A:
+ def __init__(self):
+ self.x = ti.field(ti.f32)
+ self.total = ti.field(ti.f32)
+ self.n = 128
+
+ ti.root.dense(ti.i, self.n).place(self.x)
+ ti.root.place(self.total)
+
+ @ti.kernel
+ def func(self, mul: ti.f32):
+ for i in range(self.n):
+ ti.atomic_add(self.total[None], self.x[i] * mul)
+
+ @ti.ad.grad_replaced
+ def forward(self, mul):
+ self.func(mul)
+ self.func(mul)
+
+ @ti.ad.grad_for(forward)
+ def backward(self, mul):
+ self.func.grad(mul)
+
+ a = A()
+
+ ti.root.lazy_grad()
+
+ with ti.Tape(loss=a.total):
+ a.forward(4)
+ assert a.x.grad[0] == 4
+
+
+@test_utils.test()
+def test_customized_kernels_oop2():
+ @ti.data_oriented
+ class A:
+ def __init__(self):
+ self.x = ti.field(ti.f32)
+ self.total = ti.field(ti.f32)
+ self.n = 128
+
+ ti.root.dense(ti.i, self.n).place(self.x)
+ ti.root.place(self.total)
+
+ @ti.kernel
+ def func(self, mul: ti.f32):
+ for i in range(self.n):
+ ti.atomic_add(self.total[None], self.x[i] * mul)
+
+ def func_proxy(self, mul):
+ self.func(mul)
+
+ @ti.ad.grad_replaced
+ def forward(self, mul):
+ self.func_proxy(mul)
+ self.func_proxy(mul)
+
+ @ti.ad.grad_for(forward)
+ def backward(self, mul):
+ self.func.grad(mul)
+
+ a = A()
+
+ ti.root.lazy_grad()
+
+ with ti.Tape(loss=a.total):
+ a.forward(4)
+ assert a.x.grad[0] == 4
+
+
+@test_utils.test()
+def test_decorated_primal_is_taichi_kernel():
+ x = ti.field(ti.f32)
+ total = ti.field(ti.f32)
+
+ n = 128
+
+ ti.root.dense(ti.i, n).place(x)
+ ti.root.place(total)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func(mul: ti.f32):
+ for i in range(n):
+ ti.atomic_add(total[None], x[i] * mul)
+
+ with pytest.raises(RuntimeError):
+
+ @ti.ad.grad_for(func)
+ def backward(mul):
+ func.grad(mul)
+
+ with ti.Tape(loss=total):
+ func(4)
+
+
+@test_utils.test()
+def test_decorated_primal_missing_decorator():
+ x = ti.field(ti.f32)
+ total = ti.field(ti.f32)
+
+ n = 128
+
+ ti.root.dense(ti.i, n).place(x)
+ ti.root.place(total)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func(mul: ti.f32):
+ for i in range(n):
+ ti.atomic_add(total[None], x[i] * mul)
+
+ def foward(mul):
+ func(mul)
+ func(mul)
+
+ with pytest.raises(RuntimeError):
+
+ @ti.ad.grad_for(func)
+ def backward(mul):
+ func.grad(mul)
+
+ with ti.Tape(loss=total):
+ func(4)
diff --git a/tests/_python_orig/test_debug.py b/tests/_python_orig/test_debug.py
new file mode 100644
index 000000000..e60abf74e
--- /dev/null
+++ b/tests/_python_orig/test_debug.py
@@ -0,0 +1,118 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+def test_cpu_debug_snode_reader():
+ ti.init(arch=ti.x64, debug=True)
+
+ x = ti.field(ti.f32, shape=())
+ x[None] = 10.0
+
+ assert x[None] == 10.0
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_cpu_debug_snode_writer_out_of_bound():
+ x = ti.field(ti.f32, shape=3)
+
+ with pytest.raises(RuntimeError):
+ x[3] = 10.0
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_cpu_debug_snode_writer_out_of_bound_negative():
+ x = ti.field(ti.f32, shape=3)
+ with pytest.raises(RuntimeError):
+ x[-1] = 10.0
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_cpu_debug_snode_reader_out_of_bound():
+ x = ti.field(ti.f32, shape=3)
+
+ with pytest.raises(RuntimeError):
+ a = x[3]
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_cpu_debug_snode_reader_out_of_bound_negative():
+ x = ti.field(ti.f32, shape=3)
+ with pytest.raises(RuntimeError):
+ a = x[-1]
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_out_of_bound():
+ x = ti.field(ti.i32, shape=(8, 16))
+
+ @ti.kernel
+ def func():
+ x[3, 16] = 1
+
+ with pytest.raises(RuntimeError):
+ func()
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_not_out_of_bound():
+ x = ti.field(ti.i32, shape=(8, 16))
+
+ @ti.kernel
+ def func():
+ x[7, 15] = 1
+
+ func()
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_out_of_bound_dynamic():
+ x = ti.field(ti.i32)
+
+ ti.root.dynamic(ti.i, 16, 4).place(x)
+
+ @ti.kernel
+ def func():
+ x[17] = 1
+
+ with pytest.raises(RuntimeError):
+ func()
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_not_out_of_bound_dynamic():
+ x = ti.field(ti.i32)
+
+ ti.root.dynamic(ti.i, 16, 4).place(x)
+
+ @ti.kernel
+ def func():
+ x[3] = 1
+
+ func()
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_out_of_bound_with_offset():
+ x = ti.field(ti.i32, shape=(8, 16), offset=(-8, -8))
+
+ @ti.kernel
+ def func():
+ x[0, 0] = 1
+
+ with pytest.raises(RuntimeError):
+ func()
+ func()
+
+
+@test_utils.test(require=ti.extension.assertion, debug=True, gdb_trigger=False)
+def test_not_out_of_bound_with_offset():
+ x = ti.field(ti.i32, shape=(8, 16), offset=(-4, -8))
+
+ @ti.kernel
+ def func():
+ x[-4, -8] = 1
+ x[3, 7] = 2
+
+ func()
diff --git a/tests/_python_orig/test_div.py b/tests/_python_orig/test_div.py
new file mode 100644
index 000000000..0da0c2410
--- /dev/null
+++ b/tests/_python_orig/test_div.py
@@ -0,0 +1,88 @@
+from taichi.lang import impl
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def _test_floor_div(arg1, a, arg2, b, arg3, c):
+ z = ti.field(arg3, shape=())
+
+ @ti.kernel
+ def func(x: arg1, y: arg2):
+ z[None] = x // y
+
+ func(a, b)
+ assert z[None] == c
+
+
+@test_utils.test()
+def _test_true_div(arg1, a, arg2, b, arg3, c):
+ z = ti.field(arg3, shape=())
+
+ @ti.kernel
+ def func(x: arg1, y: arg2):
+ z[None] = x / y
+
+ func(a, b)
+ assert z[None] == c
+
+
+def test_floor_div():
+ _test_floor_div(ti.i32, 10, ti.i32, 3, ti.f32, 3)
+ _test_floor_div(ti.f32, 10, ti.f32, 3, ti.f32, 3)
+ _test_floor_div(ti.i32, 10, ti.f32, 3, ti.f32, 3)
+ _test_floor_div(ti.f32, 10, ti.i32, 3, ti.f32, 3)
+
+ _test_floor_div(ti.i32, -10, ti.i32, 3, ti.f32, -4)
+ _test_floor_div(ti.f32, -10, ti.f32, 3, ti.f32, -4)
+ _test_floor_div(ti.i32, -10, ti.f32, 3, ti.f32, -4)
+ _test_floor_div(ti.f32, -10, ti.i32, 3, ti.f32, -4)
+
+ _test_floor_div(ti.i32, 10, ti.i32, -3, ti.f32, -4)
+ _test_floor_div(ti.f32, 10, ti.f32, -3, ti.f32, -4)
+ _test_floor_div(ti.i32, 10, ti.f32, -3, ti.f32, -4)
+ _test_floor_div(ti.f32, 10, ti.i32, -3, ti.f32, -4)
+
+
+def test_true_div():
+ _test_true_div(ti.i32, 3, ti.i32, 2, ti.f32, 1.5)
+ _test_true_div(ti.f32, 3, ti.f32, 2, ti.f32, 1.5)
+ _test_true_div(ti.i32, 3, ti.f32, 2, ti.f32, 1.5)
+ _test_true_div(ti.f32, 3, ti.i32, 2, ti.f32, 1.5)
+ _test_true_div(ti.f32, 3, ti.i32, 2, ti.i32, 1)
+
+ _test_true_div(ti.i32, -3, ti.i32, 2, ti.f32, -1.5)
+ _test_true_div(ti.f32, -3, ti.f32, 2, ti.f32, -1.5)
+ _test_true_div(ti.i32, -3, ti.f32, 2, ti.f32, -1.5)
+ _test_true_div(ti.f32, -3, ti.i32, 2, ti.f32, -1.5)
+ _test_true_div(ti.f32, -3, ti.i32, 2, ti.i32, -1)
+
+
+@test_utils.test()
+def test_div_default_ip():
+ impl.get_runtime().set_default_ip(ti.i64)
+ z = ti.field(ti.f32, shape=())
+
+ @ti.kernel
+ def func():
+ a = 1e15 + 1e9
+ z[None] = a // 1e10
+
+ func()
+ assert z[None] == 100000
+
+
+@test_utils.test()
+def test_floor_div_pythonic():
+ z = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func(x: ti.i32, y: ti.i32):
+ z[None] = x // y
+
+ for i in range(-10, 11):
+ for j in range(-10, 11):
+ if j != 0:
+ func(i, j)
+ assert z[None] == i // j
diff --git a/tests/_python_orig/test_dynamic.py b/tests/_python_orig/test_dynamic.py
new file mode 100644
index 000000000..a443a4d78
--- /dev/null
+++ b/tests/_python_orig/test_dynamic.py
@@ -0,0 +1,212 @@
+import pytest
+from taichi.lang.misc import serialize
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_dynamic():
+ x = ti.field(ti.f32)
+ n = 128
+
+ ti.root.dynamic(ti.i, n, 32).place(x)
+
+ @ti.kernel
+ def func():
+ pass
+
+ for i in range(n):
+ x[i] = i
+
+ for i in range(n):
+ assert x[i] == i
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_dynamic2():
+ x = ti.field(ti.f32)
+ n = 128
+
+ ti.root.dynamic(ti.i, n, 32).place(x)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ x[i] = i
+
+ func()
+
+ for i in range(n):
+ assert x[i] == i
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_dynamic_matrix():
+ x = ti.Matrix.field(2, 1, dtype=ti.i32)
+ n = 8192
+
+ ti.root.dynamic(ti.i, n, chunk_size=128).place(x)
+
+ @ti.kernel
+ def func():
+ serialize()
+ for i in range(n // 4):
+ x[i * 4][1, 0] = i
+
+ func()
+
+ for i in range(n // 4):
+ a = x[i * 4][1, 0]
+ assert a == i
+ if i + 1 < n // 4:
+ b = x[i * 4 + 1][1, 0]
+ assert b == 0
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_append():
+ x = ti.field(ti.i32)
+ n = 128
+
+ ti.root.dynamic(ti.i, n, 32).place(x)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ ti.append(x.parent(), [], i)
+
+ func()
+
+ elements = []
+ for i in range(n):
+ elements.append(x[i])
+ elements.sort()
+ for i in range(n):
+ assert elements[i] == i
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_length():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.f32, shape=())
+ n = 128
+
+ ti.root.dynamic(ti.i, n, 32).place(x)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ ti.append(x.parent(), [], i)
+
+ func()
+
+ @ti.kernel
+ def get_len():
+ y[None] = ti.length(x.parent(), [])
+
+ get_len()
+
+ assert y[None] == n
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_append_ret_value():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+ z = ti.field(ti.i32)
+ n = 128
+
+ ti.root.dynamic(ti.i, n, 32).place(x)
+ ti.root.dynamic(ti.i, n, 32).place(y)
+ ti.root.dynamic(ti.i, n, 32).place(z)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ u = ti.append(x.parent(), [], i)
+ y[u] = i + 1
+ z[u] = i + 3
+
+ func()
+
+ for i in range(n):
+ assert x[i] + 1 == y[i]
+ assert x[i] + 3 == z[i]
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_dense_dynamic():
+ # The spin lock implementation has triggered a bug in CUDA, the end result
+ # being that appending to Taichi's dynamic node messes up its length. See
+ # https://stackoverflow.com/questions/65995357/cuda-spinlock-implementation-with-independent-thread-scheduling-supported
+ # CUDA 11.2 didn't fix this bug, unfortunately.
+ if ti.lang.impl.current_cfg().arch == ti.cuda:
+ pytest.skip('CUDA spinlock bug')
+
+ n = 128
+ x = ti.field(ti.i32)
+ l = ti.field(ti.i32, shape=n)
+
+ ti.root.dense(ti.i, n).dynamic(ti.j, n, 8).place(x)
+
+ @ti.kernel
+ def func():
+ serialize()
+ for i in range(n):
+ for j in range(n):
+ ti.append(x.parent(), j, i)
+
+ for i in range(n):
+ l[i] = ti.length(x.parent(), i)
+
+ func()
+
+ for i in range(n):
+ assert l[i] == n
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_dense_dynamic_len():
+ n = 128
+ x = ti.field(ti.i32)
+ l = ti.field(ti.i32, shape=n)
+
+ ti.root.dense(ti.i, n).dynamic(ti.j, n, 32).place(x)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ l[i] = ti.length(x.parent(), i)
+
+ func()
+
+ for i in range(n):
+ assert l[i] == 0
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_dynamic_activate():
+ # record the lengths
+ l = ti.field(ti.i32, 3)
+ x = ti.field(ti.i32)
+ xp = ti.root.dynamic(ti.i, 32, 32)
+ xp.place(x)
+
+ m = 5
+
+ @ti.kernel
+ def func():
+ for i in range(m):
+ ti.append(xp, [], i)
+ l[0] = ti.length(xp, [])
+ x[20] = 42
+ l[1] = ti.length(xp, [])
+ x[10] = 43
+ l[2] = ti.length(xp, [])
+
+ func()
+ l = l.to_numpy()
+ assert l[0] == m
+ assert l[1] == 21
+ assert l[2] == 21
diff --git a/tests/_python_orig/test_eig.py b/tests/_python_orig/test_eig.py
new file mode 100644
index 000000000..f0e883f01
--- /dev/null
+++ b/tests/_python_orig/test_eig.py
@@ -0,0 +1,136 @@
+import numpy as np
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+def _eigen_vector_equal(v1, v2, tol):
+ if np.linalg.norm(v1) == 0.0:
+ assert np.linalg.norm(v2) == 0.0
+ else:
+ v1 = v1 / np.linalg.norm(v1)
+ v2 = v2 / np.linalg.norm(v2)
+ try:
+ np.testing.assert_allclose(v1, v2, atol=tol, rtol=tol)
+ except AssertionError:
+ assert np.allclose(v1, -v2, atol=tol, rtol=tol) or np.allclose(
+ v1, 1.j * v2, atol=tol, rtol=tol) or np.allclose(
+ v1, -1.j * v2, atol=tol, rtol=tol)
+
+
+def _test_eig2x2_real(dt):
+ A = ti.Matrix.field(2, 2, dtype=dt, shape=())
+ v = ti.Matrix.field(2, 2, dtype=dt, shape=())
+ w = ti.Matrix.field(4, 2, dtype=dt, shape=())
+
+ A[None] = [[1, 1], [2, 3]]
+
+ @ti.kernel
+ def eigen_solve():
+ v[None], w[None] = ti.eig(A[None])
+
+ tol = 1e-5 if dt == ti.f32 else 1e-12
+ dtype = np.float32 if dt == ti.f32 else np.float64
+
+ eigen_solve()
+ v_np, w_np = np.linalg.eig(A.to_numpy().astype(dtype))
+ v_ti = v.to_numpy()[:, 0].astype(dtype)
+ w_ti = w.to_numpy()[0::2, :].astype(dtype)
+
+ # sort by eigenvalues
+ idx_np = np.argsort(v_np)
+ idx_ti = np.argsort(v_ti)
+
+ np.testing.assert_allclose(v_ti[idx_ti], v_np[idx_np], atol=tol, rtol=tol)
+ _eigen_vector_equal(w_ti[:, idx_ti[0]], w_np[:, idx_np[0]], tol)
+ _eigen_vector_equal(w_ti[:, idx_ti[1]], w_np[:, idx_np[1]], tol)
+
+
+def _test_eig2x2_complex(dt):
+ A = ti.Matrix.field(2, 2, dtype=dt, shape=())
+ v = ti.Matrix.field(2, 2, dtype=dt, shape=())
+ w = ti.Matrix.field(4, 2, dtype=dt, shape=())
+
+ A[None] = [[1, -1], [1, 1]]
+
+ @ti.kernel
+ def eigen_solve():
+ v[None], w[None] = ti.eig(A[None])
+
+ tol = 1e-5 if dt == ti.f32 else 1e-12
+ dtype = np.float32 if dt == ti.f32 else np.float64
+
+ eigen_solve()
+ v_np, w_np = np.linalg.eig(A.to_numpy().astype(dtype))
+ v_ti = v.to_numpy().astype(dtype)
+ w_ti = w.to_numpy().astype(dtype)
+ v_ti_complex = v_ti[:, 0] + v_ti[:, 1] * 1.j
+ w_ti_complex = w_ti[0::2, :] + w_ti[1::2, :] * 1.j
+
+ # sort by eigenvalues
+ idx_np = np.argsort(v_np)
+ idx_ti = np.argsort(v_ti_complex)
+
+ np.testing.assert_allclose(v_ti_complex[idx_ti],
+ v_np[idx_np],
+ atol=tol,
+ rtol=tol)
+ _eigen_vector_equal(w_ti_complex[:, idx_ti[0]], w_np[:, idx_np[0]], tol)
+ _eigen_vector_equal(w_ti_complex[:, idx_ti[1]], w_np[:, idx_np[1]], tol)
+
+
+def _test_sym_eig2x2(dt):
+ A = ti.Matrix.field(2, 2, dtype=dt, shape=())
+ v = ti.Vector.field(2, dtype=dt, shape=())
+ w = ti.Matrix.field(2, 2, dtype=dt, shape=())
+
+ A[None] = [[5, 3], [3, 2]]
+
+ @ti.kernel
+ def eigen_solve():
+ v[None], w[None] = ti.sym_eig(A[None])
+
+ tol = 1e-5 if dt == ti.f32 else 1e-12
+ dtype = np.float32 if dt == ti.f32 else np.float64
+
+ eigen_solve()
+ v_np, w_np = np.linalg.eig(A.to_numpy().astype(dtype))
+ v_ti = v.to_numpy().astype(dtype)
+ w_ti = w.to_numpy().astype(dtype)
+
+ # sort by eigenvalues
+ idx_np = np.argsort(v_np)
+ idx_ti = np.argsort(v_ti)
+
+ np.testing.assert_allclose(v_ti[idx_ti], v_np[idx_np], atol=tol, rtol=tol)
+ _eigen_vector_equal(w_ti[:, idx_ti[0]], w_np[:, idx_np[0]], tol)
+ _eigen_vector_equal(w_ti[:, idx_ti[1]], w_np[:, idx_np[1]], tol)
+
+
+def test_eig2x2():
+ for func in [_test_eig2x2_real, _test_eig2x2_complex]:
+ for fp in [ti.f32, ti.f64]:
+
+ @test_utils.test(
+ require=ti.extension.data64 if fp == ti.f64 else [],
+ default_fp=fp,
+ fast_math=False)
+ def wrapped():
+ func(fp)
+
+ wrapped()
+
+
+def test_sym_eig2x2():
+ for func in [_test_sym_eig2x2]:
+ for fp in [ti.f32, ti.f64]:
+
+ @test_utils.test(
+ require=ti.extension.data64 if fp == ti.f64 else [],
+ default_fp=fp,
+ fast_math=False)
+ def wrapped():
+ func(fp)
+
+ wrapped()
diff --git a/tests/_python_orig/test_element_wise.py b/tests/_python_orig/test_element_wise.py
new file mode 100644
index 000000000..59354c96e
--- /dev/null
+++ b/tests/_python_orig/test_element_wise.py
@@ -0,0 +1,346 @@
+import numpy as np
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+def _c_mod(a, b):
+ return a - b * int(float(a) / b)
+
+
+@pytest.mark.parametrize('lhs_is_mat,rhs_is_mat', [(True, True), (True, False),
+ (False, True)])
+@test_utils.test(fast_math=False, exclude=[ti.vulkan])
+def test_binary_f(lhs_is_mat, rhs_is_mat):
+ x = ti.Matrix.field(3, 2, ti.f32, 16)
+ if lhs_is_mat:
+ y = ti.Matrix.field(3, 2, ti.f32, ())
+ else:
+ y = ti.field(ti.f32, ())
+ if rhs_is_mat:
+ z = ti.Matrix.field(3, 2, ti.f32, ())
+ else:
+ z = ti.field(ti.f32, ())
+
+ if lhs_is_mat:
+ y.from_numpy(np.array([[0, 2], [9, 3.1], [7, 4]], np.float32))
+ else:
+ y[None] = 6.1
+ if rhs_is_mat:
+ z.from_numpy(np.array([[4, 5], [6, 3], [9, 2]], np.float32))
+ else:
+ z[None] = 5
+
+ @ti.kernel
+ def func():
+ x[0] = y[None] + z[None]
+ x[1] = y[None] - z[None]
+ x[2] = y[None] * z[None]
+ x[3] = y[None] / z[None]
+ x[4] = y[None] // z[None]
+ x[5] = y[None] % z[None]
+ x[6] = y[None]**z[None]
+ x[7] = y[None] == z[None]
+ x[8] = y[None] != z[None]
+ x[9] = y[None] > z[None]
+ x[10] = y[None] >= z[None]
+ x[11] = y[None] < z[None]
+ x[12] = y[None] <= z[None]
+ x[13] = ti.atan2(y[None], z[None])
+ x[14] = ti.min(y[None], z[None])
+ x[15] = ti.max(y[None], z[None])
+
+ func()
+ x = x.to_numpy()
+ y = y.to_numpy()
+ z = z.to_numpy()
+ assert test_utils.allclose(x[0], y + z)
+ assert test_utils.allclose(x[1], y - z)
+ assert test_utils.allclose(x[2], y * z)
+ assert test_utils.allclose(x[3], y / z)
+ assert test_utils.allclose(x[4], y // z)
+ assert test_utils.allclose(x[5], y % z)
+ assert test_utils.allclose(x[6], y**z)
+ assert test_utils.allclose(x[7], y == z)
+ assert test_utils.allclose(x[8], y != z)
+ assert test_utils.allclose(x[9], y > z)
+ assert test_utils.allclose(x[10], y >= z)
+ assert test_utils.allclose(x[11], y < z)
+ assert test_utils.allclose(x[12], y <= z)
+ assert test_utils.allclose(x[13], np.arctan2(y, z))
+ assert test_utils.allclose(x[14], np.minimum(y, z))
+ assert test_utils.allclose(x[15], np.maximum(y, z))
+
+
+@pytest.mark.parametrize('is_mat', [(True, True), (True, False),
+ (False, True)])
+@test_utils.test()
+def test_binary_i(is_mat):
+ lhs_is_mat, rhs_is_mat = is_mat
+
+ x = ti.Matrix.field(3, 2, ti.i32, 20)
+ if lhs_is_mat:
+ y = ti.Matrix.field(3, 2, ti.i32, ())
+ else:
+ y = ti.field(ti.i32, ())
+ if rhs_is_mat:
+ z = ti.Matrix.field(3, 2, ti.i32, ())
+ else:
+ z = ti.field(ti.i32, ())
+
+ if lhs_is_mat:
+ y.from_numpy(np.array([[0, 2], [9, 3], [7, 4]], np.int32))
+ else:
+ y[None] = 6
+ if rhs_is_mat:
+ z.from_numpy(np.array([[4, 5], [6, 3], [9, 2]], np.int32))
+ else:
+ z[None] = 5
+
+ @ti.kernel
+ def func():
+ x[0] = y[None] + z[None]
+ x[1] = y[None] - z[None]
+ x[2] = y[None] * z[None]
+ x[3] = y[None] // z[None]
+ x[4] = ti.raw_div(y[None], z[None])
+ x[5] = y[None] % z[None]
+ x[6] = ti.raw_mod(y[None], z[None])
+ x[7] = y[None]**z[None]
+ x[8] = y[None] == z[None]
+ x[9] = y[None] != z[None]
+ x[10] = y[None] > z[None]
+ x[11] = y[None] >= z[None]
+ x[12] = y[None] < z[None]
+ x[13] = y[None] <= z[None]
+ x[14] = y[None] & z[None]
+ x[15] = y[None] ^ z[None]
+ x[16] = y[None] | z[None]
+ x[17] = ti.min(y[None], z[None])
+ x[18] = ti.max(y[None], z[None])
+ x[19] = y[None] << z[None]
+
+ func()
+ x = x.to_numpy()
+ y = y.to_numpy()
+ z = z.to_numpy()
+ assert test_utils.allclose(x[0], y + z)
+ assert test_utils.allclose(x[1], y - z)
+ assert test_utils.allclose(x[2], y * z)
+ assert test_utils.allclose(x[3], y // z)
+ assert test_utils.allclose(x[4], y // z)
+ assert test_utils.allclose(x[5], y % z)
+ assert test_utils.allclose(x[6], y % z)
+ assert test_utils.allclose(x[7], y**z, rel=1e-5)
+ assert test_utils.allclose(x[8], y == z)
+ assert test_utils.allclose(x[9], y != z)
+ assert test_utils.allclose(x[10], y > z)
+ assert test_utils.allclose(x[11], y >= z)
+ assert test_utils.allclose(x[12], y < z)
+ assert test_utils.allclose(x[13], y <= z)
+ assert test_utils.allclose(x[14], y & z)
+ assert test_utils.allclose(x[15], y ^ z)
+ assert test_utils.allclose(x[16], y | z)
+ assert test_utils.allclose(x[17], np.minimum(y, z))
+ assert test_utils.allclose(x[18], np.maximum(y, z))
+ assert test_utils.allclose(x[19], y << z)
+
+
+@pytest.mark.parametrize('rhs_is_mat', [True, False])
+@test_utils.test(fast_math=False)
+def test_writeback_binary_f(rhs_is_mat):
+ x = ti.Matrix.field(3, 2, ti.f32, 9)
+ y = ti.Matrix.field(3, 2, ti.f32, ())
+ if rhs_is_mat:
+ z = ti.Matrix.field(3, 2, ti.f32, ())
+ else:
+ z = ti.field(ti.f32, ())
+
+ y.from_numpy(np.array([[0, 2], [9, 3.1], [7, 4]], np.float32))
+ if rhs_is_mat:
+ z.from_numpy(np.array([[4, 5], [6, 3], [9, 2]], np.float32))
+ else:
+ z[None] = 5
+
+ @ti.kernel
+ def func():
+ for i in x:
+ x[i] = y[None]
+ if ti.static(rhs_is_mat):
+ x[0] = z[None]
+ else:
+ x[0].fill(z[None])
+ x[1] += z[None]
+ x[2] -= z[None]
+ x[3] *= z[None]
+ x[4] /= z[None]
+ x[5] //= z[None]
+ x[6] %= z[None]
+ ti.atomic_min(x[7], z[None])
+ ti.atomic_max(x[8], z[None])
+
+ func()
+ x = x.to_numpy()
+ y = y.to_numpy()
+ z = z.to_numpy()
+ assert test_utils.allclose(x[1], y + z)
+ assert test_utils.allclose(x[2], y - z)
+ assert test_utils.allclose(x[3], y * z)
+ assert test_utils.allclose(x[4], y / z)
+ assert test_utils.allclose(x[5], y // z)
+ assert test_utils.allclose(x[6], y % z)
+ assert test_utils.allclose(x[7], np.minimum(y, z))
+ assert test_utils.allclose(x[8], np.maximum(y, z))
+
+
+@pytest.mark.parametrize('rhs_is_mat', [(True, True), (True, False)])
+@test_utils.test()
+def test_writeback_binary_i(rhs_is_mat):
+ x = ti.Matrix.field(3, 2, ti.i32, 12)
+ y = ti.Matrix.field(3, 2, ti.i32, ())
+ if rhs_is_mat:
+ z = ti.Matrix.field(3, 2, ti.i32, ())
+ else:
+ z = ti.field(ti.i32, ())
+
+ y.from_numpy(np.array([[0, 2], [9, 3], [7, 4]], np.int32))
+ if rhs_is_mat:
+ z.from_numpy(np.array([[4, 5], [6, 3], [9, 2]], np.int32))
+ else:
+ z[None] = 5
+
+ @ti.kernel
+ def func():
+ for i in x:
+ x[i] = y[None]
+ x[0] = z[None]
+ x[1] += z[None]
+ x[2] -= z[None]
+ x[3] *= z[None]
+ x[4] //= z[None]
+ x[5] %= z[None]
+ x[6] &= z[None]
+ x[7] |= z[None]
+ x[8] ^= z[None]
+ ti.atomic_min(x[10], z[None])
+ ti.atomic_max(x[11], z[None])
+
+ func()
+ x = x.to_numpy()
+ y = y.to_numpy()
+ z = z.to_numpy()
+ assert test_utils.allclose(x[1], y + z)
+ assert test_utils.allclose(x[2], y - z)
+ assert test_utils.allclose(x[3], y * z)
+ assert test_utils.allclose(x[4], y // z)
+ assert test_utils.allclose(x[5], y % z)
+ assert test_utils.allclose(x[6], y & z)
+ assert test_utils.allclose(x[7], y | z)
+ assert test_utils.allclose(x[8], y ^ z)
+ assert test_utils.allclose(x[10], np.minimum(y, z))
+ assert test_utils.allclose(x[11], np.maximum(y, z))
+
+
+@test_utils.test()
+def test_unary():
+ xi = ti.Matrix.field(3, 2, ti.i32, 4)
+ yi = ti.Matrix.field(3, 2, ti.i32, ())
+ xf = ti.Matrix.field(3, 2, ti.f32, 15)
+ yf = ti.Matrix.field(3, 2, ti.f32, ())
+
+ yi.from_numpy(np.array([[3, 2], [9, 0], [7, 4]], np.int32))
+ yf.from_numpy(np.array([[0.3, 0.2], [0.9, 0.1], [0.7, 0.4]], np.float32))
+
+ @ti.kernel
+ def func():
+ xi[0] = -yi[None]
+ xi[1] = ~yi[None]
+ xi[2] = not yi[None]
+ xi[3] = abs(yi[None])
+ xf[0] = -yf[None]
+ xf[1] = abs(yf[None])
+ xf[2] = ti.sqrt(yf[None])
+ xf[3] = ti.sin(yf[None])
+ xf[4] = ti.cos(yf[None])
+ xf[5] = ti.tan(yf[None])
+ xf[6] = ti.asin(yf[None])
+ xf[7] = ti.acos(yf[None])
+ xf[8] = ti.tanh(yf[None])
+ xf[9] = ti.floor(yf[None])
+ xf[10] = ti.ceil(yf[None])
+ xf[11] = ti.exp(yf[None])
+ xf[12] = ti.log(yf[None])
+ xf[13] = ti.rsqrt(yf[None])
+ xf[14] = ti.round(yf[None])
+
+ func()
+ xi = xi.to_numpy()
+ yi = yi.to_numpy()
+ xf = xf.to_numpy()
+ yf = yf.to_numpy()
+ assert test_utils.allclose(xi[0], -yi)
+ assert test_utils.allclose(xi[1], ~yi)
+ assert test_utils.allclose(xi[3], np.abs(yi))
+ assert test_utils.allclose(xf[0], -yf)
+ assert test_utils.allclose(xf[1], np.abs(yf))
+ assert test_utils.allclose(xf[2], np.sqrt(yf), rel=1e-5)
+ assert test_utils.allclose(xf[3], np.sin(yf), rel=1e-4)
+ assert test_utils.allclose(xf[4], np.cos(yf), rel=1e-4)
+ assert test_utils.allclose(xf[5], np.tan(yf), rel=1e-4)
+ assert test_utils.allclose(xf[6], np.arcsin(yf), rel=1e-4)
+ assert test_utils.allclose(xf[7], np.arccos(yf), rel=1e-4)
+ assert test_utils.allclose(xf[8], np.tanh(yf), rel=1e-4)
+ assert test_utils.allclose(xf[9], np.floor(yf), rel=1e-5)
+ assert test_utils.allclose(xf[10], np.ceil(yf), rel=1e-5)
+ assert test_utils.allclose(xf[11], np.exp(yf), rel=1e-5)
+ assert test_utils.allclose(xf[12], np.log(yf), rel=1e-5)
+ assert test_utils.allclose(xf[13], 1 / np.sqrt(yf), rel=1e-5)
+ assert test_utils.allclose(xf[14], np.round(yf), rel=1e-5)
+
+
+@pytest.mark.parametrize('is_mat', [(True, True, True), (True, False, False),
+ (False, True, False), (False, False, True),
+ (False, True, True)])
+@test_utils.test()
+def test_ternary_i(is_mat):
+ cond_is_mat, lhs_is_mat, rhs_is_mat = is_mat
+ x = ti.Matrix.field(3, 2, ti.i32, 1)
+ if cond_is_mat:
+ y = ti.Matrix.field(3, 2, ti.i32, ())
+ else:
+ y = ti.field(ti.i32, ())
+ if lhs_is_mat:
+ z = ti.Matrix.field(3, 2, ti.i32, ())
+ else:
+ z = ti.field(ti.i32, ())
+ if rhs_is_mat:
+ w = ti.Matrix.field(3, 2, ti.i32, ())
+ else:
+ w = ti.field(ti.i32, ())
+
+ if cond_is_mat:
+ y.from_numpy(np.array([[0, 2], [9, 0], [7, 4]], np.int32))
+ else:
+ y[None] = 0
+ if lhs_is_mat:
+ z.from_numpy(np.array([[4, 5], [6, 3], [9, 2]], np.int32))
+ else:
+ z[None] = 5
+ if rhs_is_mat:
+ w.from_numpy(np.array([[4, 5], [6, 3], [9, 2]], np.int32))
+ else:
+ w[None] = 4
+
+ @ti.kernel
+ def func():
+ x[0] = z[None] if y[None] else w[None]
+
+ func()
+ x = x.to_numpy()
+ y = y.to_numpy()
+ z = z.to_numpy()
+ w = w.to_numpy()
+ assert test_utils.allclose(
+ x[0],
+ np.int32(np.bool_(y)) * z + np.int32(1 - np.bool_(y)) * w)
diff --git a/tests/_python_orig/test_empty.py b/tests/_python_orig/test_empty.py
new file mode 100644
index 000000000..80ac246bb
--- /dev/null
+++ b/tests/_python_orig/test_empty.py
@@ -0,0 +1,21 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_empty():
+ @ti.kernel
+ def func():
+ pass
+
+ func()
+
+
+@test_utils.test()
+def test_empty_args():
+ @ti.kernel
+ def func(x: ti.i32, arr: ti.ext_arr()):
+ pass
+
+ import numpy as np
+ func(42, np.arange(10, dtype=np.float32))
diff --git a/tests/_python_orig/test_exception.py b/tests/_python_orig/test_exception.py
new file mode 100644
index 000000000..2cace7487
--- /dev/null
+++ b/tests/_python_orig/test_exception.py
@@ -0,0 +1,158 @@
+from inspect import currentframe, getframeinfo
+from sys import version_info
+
+import pytest
+from tests import test_utils
+
+import taichi as ti
+
+
+@test_utils.test()
+def test_exception_multiline():
+ frameinfo = getframeinfo(currentframe())
+ with pytest.raises(ti.TaichiNameError) as e:
+ # yapf: disable
+ @ti.kernel
+ def foo():
+ aaaa(111,
+ 1211222,
+
+ 23)
+ foo()
+ # yapf: enable
+
+ if version_info < (3, 8):
+ msg = f"""
+On line {frameinfo.lineno + 5} of file "{frameinfo.filename}", in foo:
+ aaaa(111,"""
+ else:
+ msg = f"""
+On line {frameinfo.lineno + 5} of file "{frameinfo.filename}", in foo:
+ aaaa(111,
+ ^^^^"""
+ print(e.value.args[0])
+ assert e.value.args[0][:len(msg)] == msg
+
+
+@test_utils.test()
+def test_exception_from_func():
+ frameinfo = getframeinfo(currentframe())
+ with pytest.raises(ti.TaichiNameError) as e:
+
+ @ti.func
+ def baz():
+ t()
+
+ @ti.func
+ def bar():
+ baz()
+
+ @ti.kernel
+ def foo():
+ bar()
+
+ foo()
+ lineno = frameinfo.lineno
+ file = frameinfo.filename
+ if version_info < (3, 8):
+ msg = f"""
+On line {lineno + 13} of file "{file}", in foo:
+ bar()
+On line {lineno + 9} of file "{file}", in bar:
+ baz()
+On line {lineno + 5} of file "{file}", in baz:
+ t()"""
+ else:
+ msg = f"""
+On line {lineno + 13} of file "{file}", in foo:
+ bar()
+ ^^^^^
+On line {lineno + 9} of file "{file}", in bar:
+ baz()
+ ^^^^^
+On line {lineno + 5} of file "{file}", in baz:
+ t()
+ ^"""
+ print(e.value.args[0])
+ assert e.value.args[0][:len(msg)] == msg
+
+
+@test_utils.test()
+def test_tab():
+ frameinfo = getframeinfo(currentframe())
+ with pytest.raises(ti.TaichiNameError) as e:
+ # yapf: disable
+ @ti.kernel
+ def foo():
+ a(11, 22, 3)
+ foo()
+ # yapf: enable
+ lineno = frameinfo.lineno
+ file = frameinfo.filename
+ if version_info < (3, 8):
+ msg = f"""
+On line {lineno + 5} of file "{file}", in foo:
+ a(11, 22, 3)"""
+ else:
+ msg = f"""
+On line {lineno + 5} of file "{file}", in foo:
+ a(11, 22, 3)
+ ^"""
+ print(e.value.args[0])
+ assert e.value.args[0][:len(msg)] == msg
+
+
+@test_utils.test()
+def test_super_long_line():
+ frameinfo = getframeinfo(currentframe())
+ with pytest.raises(ti.TaichiNameError) as e:
+ # yapf: disable
+ @ti.kernel
+ def foo():
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(111)
+ foo()
+ # yapf: enable
+ lineno = frameinfo.lineno
+ file = frameinfo.filename
+ if version_info < (3, 8):
+ msg = f"""
+On line {lineno + 5} of file "{file}", in foo:
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(111)
+"""
+ else:
+ msg = f"""
+On line {lineno + 5} of file "{file}", in foo:
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbaaaaaa
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+bbbbbbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(111)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"""
+ print(e.value.args[0])
+ assert e.value.args[0][:len(msg)] == msg
+
+
+@pytest.mark.skipif(version_info < (3, 8), reason="This is a feature for python>=3.8")
+@test_utils.test()
+def test_exception_in_node_with_body():
+ frameinfo = getframeinfo(currentframe())
+ @ti.kernel
+ def foo():
+ for i in range(1, 2, 3):
+ a = 1
+ b = 1
+ c = 1
+ d = 1
+
+ with pytest.raises(ti.TaichiCompilationError) as e:
+ foo()
+ lineno = frameinfo.lineno
+ file = frameinfo.filename
+ msg = f"""
+On line {lineno + 3} of file "{file}", in foo:
+ for i in range(1, 2, 3):
+ ^^^^^^^^^^^^^^^^^^^^^^^^
+Range should have 1 or 2 arguments, found 3"""
+ print(e.value.args[0])
+ assert e.value.args[0] == msg
+
diff --git a/tests/_python_orig/test_expr_dict.py b/tests/_python_orig/test_expr_dict.py
new file mode 100644
index 000000000..2cb32de83
--- /dev/null
+++ b/tests/_python_orig/test_expr_dict.py
@@ -0,0 +1,44 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(ti.cpu)
+def test_expr_dict_basic():
+ @ti.kernel
+ def func(u: int, v: float) -> float:
+ x = {'foo': 2 + u, 'bar': 3 + v}
+ return x['foo'] * 100 + x['bar']
+
+ assert func(2, 0.1) == test_utils.approx(403.1)
+
+
+@test_utils.test(ti.cpu)
+def test_expr_dict_field():
+ a = ti.field(ti.f32, shape=(4, ))
+
+ @ti.kernel
+ def func() -> float:
+ x = {'foo': 2 + a[0], 'bar': 3 + a[1]}
+ return x['foo'] * 100 + x['bar']
+
+ a[0] = 2
+ a[1] = 0.1
+ assert func() == test_utils.approx(403.1)
+
+
+@test_utils.test(ti.cpu)
+def test_dictcomp_multiple_ifs():
+ n = 8
+ x = ti.field(ti.i32, shape=(n, ))
+
+ @ti.kernel
+ def test() -> ti.i32:
+ # Taichi doesn't support global fields appearing anywhere after "for"
+ # here.
+ a = {x[j]: x[j] + j for j in range(100) if j > 2 if j < 5}
+ return sum(a.values())
+
+ for i in range(n):
+ x[i] = i * 2
+
+ assert test() == (3 * 2 + 3) + (4 * 2 + 4)
diff --git a/tests/_python_orig/test_expr_list.py b/tests/_python_orig/test_expr_list.py
new file mode 100644
index 000000000..5f9ea669c
--- /dev/null
+++ b/tests/_python_orig/test_expr_list.py
@@ -0,0 +1,28 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(ti.cpu)
+def test_expr_list_basic():
+ @ti.kernel
+ def func(u: int, v: float) -> float:
+ x = [2 + u, 3 + v]
+ return x[0] * 100 + x[1]
+
+ assert func(1, 1.1) == test_utils.approx(304.1)
+
+
+@test_utils.test()
+def test_listcomp_multiple_ifs():
+ x = ti.field(ti.i32, shape=(4, ))
+
+ @ti.kernel
+ def test() -> ti.i32:
+ # Taichi doesn't support global fields appearing anywhere after "for"
+ # here.
+ a = [x[0] for j in range(100) if j > 2 if j < 5]
+ return sum(a)
+
+ for i in range(6):
+ x[0] = i
+ assert test() == i * 2
diff --git a/tests/_python_orig/test_external_func.py b/tests/_python_orig/test_external_func.py
new file mode 100644
index 000000000..b7c4bb22f
--- /dev/null
+++ b/tests/_python_orig/test_external_func.py
@@ -0,0 +1,89 @@
+import ctypes
+import os
+import shutil
+import tempfile
+
+import pytest
+from taichi.lang.util import has_clangpp
+
+import taichi as ti
+from tests import test_utils
+
+
+@pytest.mark.skipif(not has_clangpp(), reason='Clang not installed.')
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_source_builder_from_source():
+ source_bc = '''
+ extern "C" {
+ void add_and_mul(float *a, float *b, float *c, float *d, int *e) {
+ *c = (*a) + (*b);
+ *d = (*a) * (*b);
+ *e = int((*a) * (*b) + (*a));
+ }
+ void pow_int(int *a, int *b, int *c) {
+ int ret = 1;
+ for (int i = 0; i < (*b); i++)
+ ret = ret * (*a);
+ *c = ret;
+ }
+ }
+ '''
+ sb_bc = ti.lang.source_builder.SourceBuilder.from_source(source_bc)
+
+ @ti.kernel
+ def func_bc() -> ti.i32:
+ a = 2.0
+ b = 3.0
+ c = 0.0
+ d = 0.0
+ e = 3
+ sb_bc.add_and_mul(a, b, c, d, e)
+ p = 0
+ c_plus_d = int(c + d)
+ sb_bc.pow_int(c_plus_d, e, p)
+ return p
+
+ assert func_bc() == 11**8
+
+
+@pytest.mark.skipif(not has_clangpp(), reason='Clang not installed.')
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_source_builder_from_file():
+ source_code = '''
+ extern "C" {
+ void add_and_mul(float *a, float *b, float *c, float *d, int *e) {
+ *c = (*a) + (*b);
+ *d = (*a) * (*b);
+ *e = int((*a) * (*b) + (*a));
+ }
+ void pow_int(int *a, int *b, int *c) {
+ int ret = 1;
+ for (int i = 0; i < (*b); i++)
+ ret = ret * (*a);
+ *c = ret;
+ }
+ }
+ '''
+
+ td = tempfile.mkdtemp()
+ fn = os.path.join(td, 'source.cpp')
+ with open(fn, 'w') as f:
+ f.write(source_code)
+ sb_bc = ti.lang.source_builder.SourceBuilder.from_file(fn)
+
+ @ti.kernel
+ def func_bc() -> ti.i32:
+ a = 2.0
+ b = 3.0
+ c = 0.0
+ d = 0.0
+ e = 3
+ sb_bc.add_and_mul(a, b, c, d, e)
+ p = 0
+ c_plus_d = int(c + d)
+ sb_bc.pow_int(c_plus_d, e, p)
+ return p
+
+ assert func_bc() == 11**8
+
+ shutil.rmtree(td)
diff --git a/tests/_python_orig/test_f16.py b/tests/_python_orig/test_f16.py
new file mode 100644
index 000000000..00709f0a9
--- /dev/null
+++ b/tests/_python_orig/test_f16.py
@@ -0,0 +1,303 @@
+import math
+
+import numpy as np
+import pytest
+from taichi.lang.util import has_pytorch
+
+import taichi as ti
+from tests import test_utils
+
+archs_support_f16 = [ti.cpu, ti.cuda, ti.vulkan]
+
+
+@test_utils.test(arch=archs_support_f16)
+def test_snode_read_write():
+ dtype = ti.f16
+ x = ti.field(dtype, shape=())
+ x[None] = 0.3
+ print(x[None])
+ assert (x[None] == test_utils.approx(0.3, rel=1e-3))
+
+
+@test_utils.test(arch=archs_support_f16)
+def test_float16():
+ dtype = ti.float16
+ x = ti.field(dtype, shape=())
+ x[None] = 0.3
+ print(x[None])
+ assert (x[None] == test_utils.approx(0.3, rel=1e-3))
+
+
+@test_utils.test(arch=archs_support_f16)
+def test_to_numpy():
+ n = 16
+ x = ti.field(ti.f16, shape=n)
+
+ @ti.kernel
+ def init():
+ for i in x:
+ x[i] = i * 2
+
+ init()
+ y = x.to_numpy()
+ for i in range(n):
+ assert (y[i] == 2 * i)
+
+
+@test_utils.test(arch=archs_support_f16)
+def test_from_numpy():
+ n = 16
+ y = ti.field(dtype=ti.f16, shape=n)
+ x = np.arange(n, dtype=np.half)
+ y.from_numpy(x)
+
+ @ti.kernel
+ def init():
+ for i in y:
+ y[i] = 3 * i
+
+ init()
+ z = y.to_numpy()
+ for i in range(n):
+ assert (z[i] == i * 3)
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(arch=archs_support_f16)
+def test_to_torch():
+ n = 16
+ x = ti.field(ti.f16, shape=n)
+
+ @ti.kernel
+ def init():
+ for i in x:
+ x[i] = i * 2
+
+ init()
+ y = x.to_torch()
+ print(y)
+ for i in range(n):
+ assert (y[i] == 2 * i)
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(arch=archs_support_f16)
+def test_from_torch():
+ import torch
+ n = 16
+ y = ti.field(dtype=ti.f16, shape=n)
+ # torch doesn't have rand implementation for float16 so we need to create float first and then convert
+ x = torch.range(0, n - 1).to(torch.float16)
+ y.from_torch(x)
+
+ @ti.kernel
+ def init():
+ for i in y:
+ y[i] = 3 * i
+
+ init()
+ z = y.to_torch()
+ for i in range(n):
+ assert (z[i] == i * 3)
+
+
+@test_utils.test(arch=archs_support_f16)
+def test_binary_op():
+ dtype = ti.f16
+ x = ti.field(dtype, shape=())
+ y = ti.field(dtype, shape=())
+ z = ti.field(dtype, shape=())
+
+ @ti.kernel
+ def add():
+ x[None] = y[None] + z[None]
+ x[None] = x[None] * z[None]
+
+ y[None] = 0.2
+ z[None] = 0.72
+ add()
+ u = x.to_numpy()
+ assert (u[None] == test_utils.approx(0.6624, rel=1e-3))
+
+
+@test_utils.test(arch=archs_support_f16)
+def test_rand_promote():
+ dtype = ti.f16
+ x = ti.field(dtype, shape=(4, 4))
+
+ @ti.kernel
+ def init():
+ for i, j in x:
+ x[i, j] = ti.random(dtype=dtype)
+ print(x[i, j])
+
+ init()
+
+
+@test_utils.test(arch=archs_support_f16)
+def test_unary_op():
+ dtype = ti.f16
+ x = ti.field(dtype, shape=())
+ y = ti.field(dtype, shape=())
+
+ @ti.kernel
+ def foo():
+ x[None] = -y[None]
+ x[None] = ti.floor(x[None])
+ y[None] = ti.ceil(y[None])
+
+ y[None] = -1.4
+ foo()
+ assert (x[None] == test_utils.approx(1, rel=1e-3))
+ assert (y[None] == test_utils.approx(-1, rel=1e-3))
+
+
+@test_utils.test(arch=archs_support_f16)
+def test_extra_unary_promote():
+ dtype = ti.f16
+ x = ti.field(dtype, shape=())
+ y = ti.field(dtype, shape=())
+
+ @ti.kernel
+ def foo():
+ x[None] = abs(y[None])
+
+ y[None] = -0.3
+ foo()
+ assert (x[None] == test_utils.approx(0.3, rel=1e-3))
+
+
+@test_utils.test(arch=archs_support_f16, exclude=ti.vulkan)
+def test_binary_extra_promote():
+ x = ti.field(dtype=ti.f16, shape=())
+ y = ti.field(dtype=ti.f16, shape=())
+ z = ti.field(dtype=ti.f16, shape=())
+
+ @ti.kernel
+ def foo():
+ y[None] = x[None]**2
+ z[None] = ti.atan2(y[None], 0.3)
+
+ x[None] = 0.1
+ foo()
+ assert (z[None] == test_utils.approx(math.atan2(0.1**2, 0.3), rel=1e-3))
+
+
+@test_utils.test(arch=archs_support_f16)
+def test_arg_f16():
+ dtype = ti.f16
+ x = ti.field(dtype, shape=())
+ y = ti.field(dtype, shape=())
+
+ @ti.kernel
+ def foo(a: ti.f16):
+ x[None] = y[None] + a
+
+ y[None] = -0.3
+ foo(1.2)
+ assert (x[None] == test_utils.approx(0.9, rel=1e-3))
+
+
+@test_utils.test(arch=archs_support_f16)
+def test_fractal_f16():
+ n = 320
+ pixels = ti.field(dtype=ti.f16, shape=(n * 2, n))
+
+ @ti.func
+ def complex_sqr(z):
+ return ti.Vector([z[0]**2 - z[1]**2, z[1] * z[0] * 2], dt=ti.f16)
+
+ @ti.kernel
+ def paint(t: float):
+ for i, j in pixels: # Parallelized over all pixels
+ c = ti.Vector([-0.8, ti.cos(t) * 0.2], dt=ti.f16)
+ z = ti.Vector([i / n - 1, j / n - 0.5], dt=ti.f16) * 2
+ iterations = 0
+ while z.norm() < 20 and iterations < 50:
+ z = complex_sqr(z) + c
+ iterations += 1
+ pixels[i, j] = 1 - iterations * 0.02
+
+ paint(0.03)
+
+
+# TODO(): Vulkan support
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_atomic_add_f16():
+ f = ti.field(dtype=ti.f16, shape=(2))
+
+ @ti.kernel
+ def foo():
+ # Parallel sum
+ for i in range(1000):
+ f[0] += 1.12
+
+ # Serial sum
+ for _ in range(1):
+ for i in range(1000):
+ f[1] = f[1] + 1.12
+
+ foo()
+ assert (f[0] == test_utils.approx(f[1], rel=1e-3))
+
+
+# TODO(): Vulkan support
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_atomic_max_f16():
+ f = ti.field(dtype=ti.f16, shape=(2))
+
+ @ti.kernel
+ def foo():
+ # Parallel max
+ for i in range(1000):
+ ti.atomic_max(f[0], 1.12 * i)
+
+ # Serial max
+ for _ in range(1):
+ for i in range(1000):
+ f[1] = ti.max(1.12 * i, f[1])
+
+ foo()
+ assert (f[0] == test_utils.approx(f[1], rel=1e-3))
+
+
+# TODO(): Vulkan support
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_atomic_min_f16():
+ f = ti.field(dtype=ti.f16, shape=(2))
+
+ @ti.kernel
+ def foo():
+ # Parallel min
+ for i in range(1000):
+ ti.atomic_min(f[0], -3.13 * i)
+
+ # Serial min
+ for _ in range(1):
+ for i in range(1000):
+ f[1] = ti.min(-3.13 * i, f[1])
+
+ foo()
+ assert (f[0] == test_utils.approx(f[1], rel=1e-3))
+
+
+@test_utils.test(arch=archs_support_f16)
+def test_cast_f32_to_f16():
+ @ti.kernel
+ def func() -> ti.f16:
+ a = ti.cast(23.0, ti.f32)
+ b = ti.cast(4.0, ti.f32)
+ return ti.cast(a * b, ti.f16)
+
+ assert func() == pytest.approx(23.0 * 4.0, 1e-4)
+
+
+@test_utils.test(arch=archs_support_f16, require=ti.extension.data64)
+def test_cast_f64_to_f16():
+ @ti.kernel
+ def func() -> ti.f16:
+ a = ti.cast(23.0, ti.f64)
+ b = ti.cast(4.0, ti.f64)
+ return ti.cast(a * b, ti.f16)
+
+ assert func() == pytest.approx(23.0 * 4.0, 1e-4)
diff --git a/tests/_python_orig/test_field.py b/tests/_python_orig/test_field.py
new file mode 100644
index 000000000..7f1bcd1fb
--- /dev/null
+++ b/tests/_python_orig/test_field.py
@@ -0,0 +1,183 @@
+'''
+To test our new `ti.field` API is functional (#1500)
+'''
+
+import pytest
+from taichi.lang import impl
+from taichi.lang.misc import get_host_arch_list
+
+import taichi as ti
+from tests import test_utils
+
+data_types = [ti.i32, ti.f32, ti.i64, ti.f64]
+field_shapes = [(), 8, (6, 12)]
+vector_dims = [3]
+matrix_dims = [(1, 2), (2, 3)]
+
+
+@pytest.mark.parametrize('dtype', data_types)
+@pytest.mark.parametrize('shape', field_shapes)
+@test_utils.test(arch=get_host_arch_list())
+def test_scalar_field(dtype, shape):
+ x = ti.field(dtype, shape)
+
+ if isinstance(shape, tuple):
+ assert x.shape == shape
+ else:
+ assert x.shape == (shape, )
+
+ assert x.dtype == dtype
+
+
+@pytest.mark.parametrize('n', vector_dims)
+@pytest.mark.parametrize('dtype', data_types)
+@pytest.mark.parametrize('shape', field_shapes)
+@test_utils.test(arch=get_host_arch_list())
+def test_vector_field(n, dtype, shape):
+ x = ti.Vector.field(n, dtype, shape)
+
+ if isinstance(shape, tuple):
+ assert x.shape == shape
+ else:
+ assert x.shape == (shape, )
+
+ assert x.dtype == dtype
+ assert x.n == n
+ assert x.m == 1
+
+
+@pytest.mark.parametrize('n,m', matrix_dims)
+@pytest.mark.parametrize('dtype', data_types)
+@pytest.mark.parametrize('shape', field_shapes)
+@test_utils.test(arch=get_host_arch_list())
+def test_matrix_field(n, m, dtype, shape):
+ x = ti.Matrix.field(n, m, dtype=dtype, shape=shape)
+
+ if isinstance(shape, tuple):
+ assert x.shape == shape
+ else:
+ assert x.shape == (shape, )
+
+ assert x.dtype == dtype
+ assert x.n == n
+ assert x.m == m
+
+
+@pytest.mark.parametrize('dtype', data_types)
+@pytest.mark.parametrize('shape', field_shapes)
+@test_utils.test(arch=get_host_arch_list())
+def test_scalr_field_from_numpy(dtype, shape):
+ import numpy as np
+ x = ti.field(dtype, shape)
+ # use the corresponding dtype for the numpy array.
+ numpy_dtypes = {
+ ti.i32: np.int32,
+ ti.f32: np.float32,
+ ti.f64: np.float64,
+ ti.i64: np.int64,
+ }
+ arr = np.empty(shape, dtype=numpy_dtypes[dtype])
+ x.from_numpy(arr)
+
+
+@pytest.mark.parametrize('dtype', data_types)
+@pytest.mark.parametrize('shape', field_shapes)
+@test_utils.test(arch=get_host_arch_list())
+def test_scalr_field_from_numpy_with_mismatch_shape(dtype, shape):
+ import numpy as np
+ x = ti.field(dtype, shape)
+ numpy_dtypes = {
+ ti.i32: np.int32,
+ ti.f32: np.float32,
+ ti.f64: np.float64,
+ ti.i64: np.int64,
+ }
+ # compose the mismatch shape for every ti.field.
+ # set the shape to (2, 3) by default, if the ti.field shape is a tuple, set it to 1.
+ mismatch_shape = (2, 3)
+ if isinstance(shape, tuple):
+ mismatch_shape = 1
+ arr = np.empty(mismatch_shape, dtype=numpy_dtypes[dtype])
+ with pytest.raises(ValueError):
+ x.from_numpy(arr)
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_field_needs_grad():
+ # Just make sure the usage doesn't crash, see #1545
+ n = 8
+ m1 = ti.field(dtype=ti.f32, shape=n, needs_grad=True)
+ m2 = ti.field(dtype=ti.f32, shape=n, needs_grad=True)
+ gr = ti.field(dtype=ti.f32, shape=n)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ gr[i] = m1.grad[i] + m2.grad[i]
+
+ func()
+
+
+@pytest.mark.parametrize('dtype', [ti.f32, ti.f64])
+def test_default_fp(dtype):
+ ti.init(default_fp=dtype)
+
+ x = ti.Vector.field(2, float, ())
+
+ assert x.dtype == impl.get_runtime().default_fp
+
+
+@pytest.mark.parametrize('dtype', [ti.i32, ti.i64])
+def test_default_ip(dtype):
+ ti.init(default_ip=dtype)
+
+ x = ti.Vector.field(2, int, ())
+
+ assert x.dtype == impl.get_runtime().default_ip
+
+
+@test_utils.test()
+def test_field_name():
+ a = ti.field(dtype=ti.f32, shape=(2, 3), name='a')
+ b = ti.Vector.field(3, dtype=ti.f32, shape=(2, 3), name='b')
+ c = ti.Matrix.field(3, 3, dtype=ti.f32, shape=(5, 4), name='c')
+ assert a._name == 'a'
+ assert b._name == 'b'
+ assert c._name == 'c'
+ assert b.snode._name == 'b'
+ d = []
+ for i in range(10):
+ d.append(ti.field(dtype=ti.f32, shape=(2, 3), name=f'd{i}'))
+ assert d[i]._name == f'd{i}'
+
+
+@test_utils.test()
+@pytest.mark.parametrize('shape', field_shapes)
+@pytest.mark.parametrize('dtype', [ti.i32, ti.f32])
+def test_field_copy_from(shape, dtype):
+ x = ti.field(dtype=ti.f32, shape=shape)
+ other = ti.field(dtype=dtype, shape=shape)
+ other.fill(1)
+ x.copy_from(other)
+ convert = lambda arr: arr[0] if len(arr) == 1 else arr
+ assert (convert(x.shape) == shape)
+ assert (x.dtype == ti.f32)
+ assert ((x.to_numpy() == 1).all())
+
+
+@test_utils.test()
+def test_field_copy_from_with_mismatch_shape():
+ x = ti.field(dtype=ti.f32, shape=(2, 3))
+ for other_shape in [(2, ), (2, 2), (2, 3, 4)]:
+ other = ti.field(dtype=ti.f16, shape=other_shape)
+ with pytest.raises(ValueError):
+ x.copy_from(other)
+
+
+@test_utils.test()
+def test_field_copy_from_with_non_filed_object():
+ import numpy as np
+ x = ti.field(dtype=ti.f32, shape=(2, 3))
+ other = np.zeros((2, 3))
+ with pytest.raises(TypeError):
+ x.copy_from(other)
diff --git a/tests/_python_orig/test_fields_builder.py b/tests/_python_orig/test_fields_builder.py
new file mode 100644
index 000000000..80e7db5a6
--- /dev/null
+++ b/tests/_python_orig/test_fields_builder.py
@@ -0,0 +1,180 @@
+import pytest
+from taichi.lang.exception import TaichiRuntimeError
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda, ti.vulkan, ti.metal])
+def test_fields_with_shape():
+ shape = 5
+ x = ti.field(ti.f32, shape=shape)
+
+ @ti.kernel
+ def assign_field_single():
+ for i in range(shape):
+ x[i] = i
+
+ assign_field_single()
+ for i in range(shape):
+ assert x[i] == i
+
+ y = ti.field(ti.f32, shape=shape)
+
+ @ti.kernel
+ def assign_field_multiple():
+ for i in range(shape):
+ y[i] = i * 2
+ for i in range(shape):
+ x[i] = i * 3
+
+ assign_field_multiple()
+ for i in range(shape):
+ assert x[i] == i * 3
+ assert y[i] == i * 2
+
+ assign_field_single()
+ for i in range(shape):
+ assert x[i] == i
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda, ti.vulkan, ti.metal])
+def test_fields_builder_dense():
+ shape = 5
+ fb1 = ti.FieldsBuilder()
+ x = ti.field(ti.f32)
+ fb1.dense(ti.i, shape).place(x)
+ fb1.finalize()
+
+ @ti.kernel
+ def assign_field_single():
+ for i in range(shape):
+ x[i] = i * 3
+
+ assign_field_single()
+ for i in range(shape):
+ assert x[i] == i * 3
+
+ fb2 = ti.FieldsBuilder()
+ y = ti.field(ti.f32)
+ fb2.dense(ti.i, shape).place(y)
+ z = ti.field(ti.f32)
+ fb2.dense(ti.i, shape).place(z)
+ fb2.finalize()
+
+ @ti.kernel
+ def assign_field_multiple():
+ for i in range(shape):
+ x[i] = i * 2
+ for i in range(shape):
+ y[i] = i + 5
+ for i in range(shape):
+ z[i] = i + 10
+
+ assign_field_multiple()
+ for i in range(shape):
+ assert x[i] == i * 2
+ assert y[i] == i + 5
+ assert z[i] == i + 10
+
+ assign_field_single()
+ for i in range(shape):
+ assert x[i] == i * 3
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda, ti.metal])
+def test_fields_builder_pointer():
+ shape = 5
+ fb1 = ti.FieldsBuilder()
+ x = ti.field(ti.f32)
+ fb1.pointer(ti.i, shape).place(x)
+ fb1.finalize()
+
+ @ti.kernel
+ def assign_field_single():
+ for i in range(shape):
+ x[i] = i * 3
+
+ assign_field_single()
+ for i in range(shape):
+ assert x[i] == i * 3
+
+ fb2 = ti.FieldsBuilder()
+ y = ti.field(ti.f32)
+ fb2.pointer(ti.i, shape).place(y)
+ z = ti.field(ti.f32)
+ fb2.pointer(ti.i, shape).place(z)
+ fb2.finalize()
+
+ @ti.kernel
+ def assign_field_multiple_range_for():
+ for i in range(shape):
+ x[i] = i * 2
+ for i in range(shape):
+ y[i] = i + 5
+ for i in range(shape):
+ z[i] = i + 10
+
+ assign_field_multiple_range_for()
+ for i in range(shape):
+ assert x[i] == i * 2
+ assert y[i] == i + 5
+ assert z[i] == i + 10
+
+ @ti.kernel
+ def assign_field_multiple_struct_for():
+ for i in y:
+ y[i] += 5
+ for i in z:
+ z[i] -= 5
+
+ assign_field_multiple_struct_for()
+ for i in range(shape):
+ assert y[i] == i + 10
+ assert z[i] == i + 5
+
+ assign_field_single()
+ for i in range(shape):
+ assert x[i] == i * 3
+
+
+# We currently only consider data types that all platforms support.
+# See https://docs.taichi.graphics/lang/articles/basic/type#supported-primitive-types for more details.
+@pytest.mark.parametrize('test_1d_size', [1, 10, 100])
+@pytest.mark.parametrize('field_type', [ti.f32, ti.i32])
+@test_utils.test(arch=[ti.cpu, ti.cuda, ti.vulkan, ti.metal])
+def test_fields_builder_destroy(test_1d_size, field_type):
+ def test_for_single_destroy_multi_fields():
+ fb = ti.FieldsBuilder()
+ for create_field_idx in range(10):
+ field = ti.field(field_type)
+ fb.dense(ti.i, test_1d_size).place(field)
+ fb_snode_tree = fb.finalize()
+ fb_snode_tree.destroy()
+
+ def test_for_multi_destroy_multi_fields():
+ fb0 = ti.FieldsBuilder()
+ fb1 = ti.FieldsBuilder()
+
+ for create_field_idx in range(10):
+ field0 = ti.field(field_type)
+ field1 = ti.field(field_type)
+
+ fb0.dense(ti.i, test_1d_size).place(field0)
+ fb1.pointer(ti.i, test_1d_size).place(field1)
+
+ fb0_snode_tree = fb0.finalize()
+ fb1_snode_tree = fb1.finalize()
+
+ fb0_snode_tree.destroy()
+ fb1_snode_tree.destroy()
+
+ def test_for_raise_destroy_twice():
+ fb = ti.FieldsBuilder()
+ a = ti.field(ti.f32)
+ fb.dense(ti.i, test_1d_size).place(a)
+ c = fb.finalize()
+
+ with pytest.raises(TaichiRuntimeError):
+ c.destroy()
+ c.destroy()
diff --git a/tests/_python_orig/test_fill.py b/tests/_python_orig/test_fill.py
new file mode 100644
index 000000000..5692fe388
--- /dev/null
+++ b/tests/_python_orig/test_fill.py
@@ -0,0 +1,71 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_fill_scalar():
+ val = ti.field(ti.i32)
+ n = 4
+ m = 7
+
+ ti.root.dense(ti.ij, (n, m)).place(val)
+
+ for i in range(n):
+ for j in range(m):
+ val[i, j] = i + j * 3
+
+ val.fill(2)
+
+ for i in range(n):
+ for j in range(m):
+ assert val[i, j] == 2
+
+
+@test_utils.test()
+def test_fill_matrix_scalar():
+ val = ti.Matrix.field(2, 3, ti.i32)
+
+ n = 4
+ m = 7
+
+ ti.root.dense(ti.ij, (n, m)).place(val)
+
+ for i in range(n):
+ for j in range(m):
+ for p in range(2):
+ for q in range(3):
+ val[i, j][p, q] = i + j * 3
+
+ val.fill(2)
+
+ for i in range(n):
+ for j in range(m):
+ for p in range(2):
+ for q in range(3):
+ assert val[i, j][p, q] == 2
+
+
+@test_utils.test()
+def test_fill_matrix_matrix():
+ val = ti.Matrix.field(2, 3, ti.i32)
+
+ n = 4
+ m = 7
+
+ ti.root.dense(ti.ij, (n, m)).place(val)
+
+ for i in range(n):
+ for j in range(m):
+ for p in range(2):
+ for q in range(3):
+ val[i, j][p, q] = i + j * 3
+
+ mat = ti.Matrix([[0, 1, 2], [2, 3, 4]])
+
+ val.fill(mat)
+
+ for i in range(n):
+ for j in range(m):
+ for p in range(2):
+ for q in range(3):
+ assert val[i, j][p, q] == mat(p, q)
diff --git a/tests/_python_orig/test_for_break.py b/tests/_python_orig/test_for_break.py
new file mode 100644
index 000000000..ba858f9f0
--- /dev/null
+++ b/tests/_python_orig/test_for_break.py
@@ -0,0 +1,94 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_for_break():
+ x = ti.field(ti.i32)
+ N, M = 4, 4
+ ti.root.dense(ti.ij, (N, M)).place(x)
+
+ @ti.kernel
+ def func():
+ for i in range(N):
+ for j in range(M):
+ if j > i:
+ break
+ x[i, j] = 100 * i + j
+
+ func()
+ for i in range(N):
+ for j in range(M):
+ if j > i:
+ assert x[i, j] == 0
+ else:
+ assert x[i, j] == 100 * i + j
+
+
+@test_utils.test()
+def test_for_break2():
+ x = ti.field(ti.i32)
+ N, M = 8, 8
+ ti.root.dense(ti.ij, (N, M)).place(x)
+
+ @ti.kernel
+ def func():
+ for i in range(N):
+ for j in range(M):
+ x[i, j] = 100 * i + j
+ if j > i:
+ break
+
+ func()
+ for i in range(N):
+ for j in range(M):
+ if j > i + 1:
+ assert x[i, j] == 0
+ else:
+ assert x[i, j] == 100 * i + j
+
+
+@test_utils.test(exclude=ti.vulkan)
+def test_for_break3():
+ x = ti.field(ti.i32)
+ N, M = 8, 8
+ ti.root.dense(ti.ij, (N, M)).place(x)
+
+ @ti.kernel
+ def func():
+ for i in range(N):
+ for j in range(i, M - i):
+ if i == 0:
+ break
+ x[i, j] = 100 * i + j
+
+ func()
+ for i in range(N):
+ for j in range(M):
+ if j < i or j >= M - i or i == 0:
+ assert x[i, j] == 0
+ else:
+ assert x[i, j] == 100 * i + j
+
+
+@test_utils.test()
+def test_for_break_complex():
+ x = ti.field(ti.i32)
+ N, M = 16, 32
+ ti.root.dense(ti.ij, (N, M)).place(x)
+
+ @ti.kernel
+ def func():
+ for i in range(1, N):
+ for j in range(3, M):
+ if j > i:
+ break
+ x[i, j] = 100 * i + j
+
+ func()
+ for i in range(N):
+ for j in range(M):
+ if i < 1 or j < 3 or j > i:
+ assert x[i, j] == 0
+ else:
+ assert x[i, j] == 100 * i + j
diff --git a/tests/_python_orig/test_for_group_mismatch.py b/tests/_python_orig/test_for_group_mismatch.py
new file mode 100644
index 000000000..e25cea450
--- /dev/null
+++ b/tests/_python_orig/test_for_group_mismatch.py
@@ -0,0 +1,100 @@
+import pytest
+from taichi.lang.misc import get_host_arch_list
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_struct_for_mismatch():
+ x = ti.field(ti.f32, (3, 4))
+
+ @ti.kernel
+ def func():
+ for i in x:
+ print(i)
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_struct_for_mismatch2():
+ x = ti.field(ti.f32, (3, 4))
+
+ @ti.kernel
+ def func():
+ for i, j, k in x:
+ print(i, j, k)
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def _test_grouped_struct_for_mismatch():
+ # doesn't work for now
+ # need grouped refactor
+ # for now, it just throw a unfriendly message:
+ # AssertionError: __getitem__ cannot be called in Python-scope
+ x = ti.field(ti.f32, (3, 4))
+
+ @ti.kernel
+ def func():
+ for i, j in ti.grouped(x):
+ print(i, j)
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def _test_ndrange_for_mismatch():
+ # doesn't work for now
+ # need ndrange refactor
+ @ti.kernel
+ def func():
+ for i in ti.ndrange(3, 4):
+ print(i)
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def _test_ndrange_for_mismatch2():
+ # doesn't work for now
+ # need ndrange and grouped refactor
+ @ti.kernel
+ def func():
+ for i, j, k in ti.ndrange(3, 4):
+ print(i, j, k)
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def _test_grouped_ndrange_for_mismatch():
+ # doesn't work for now
+ # need ndrange and grouped refactor
+ @ti.kernel
+ def func():
+ for i in ti.grouped(ti.ndrange(3, 4)):
+ print(i)
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def _test_static_ndrange_for_mismatch():
+ # doesn't work for now
+ # need ndrange and static refactor
+ @ti.kernel
+ def func():
+ for i in ti.static(ti.ndrange(3, 4)):
+ print(i)
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
diff --git a/tests/_python_orig/test_fp_flush_to_zero.py b/tests/_python_orig/test_fp_flush_to_zero.py
new file mode 100644
index 000000000..d3c06e4c9
--- /dev/null
+++ b/tests/_python_orig/test_fp_flush_to_zero.py
@@ -0,0 +1,32 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_ftz_f32():
+ a = ti.field(dtype=ti.f32, shape=2)
+
+ @ti.kernel
+ def foo():
+ a[0] = 1e-45
+ a[1] = 1e-10 * 1e-35
+
+ foo()
+ assert a[0] == 0
+ assert a[1] == 0
+
+
+@test_utils.test(require=ti.extension.data64)
+def test_ftz_f64():
+ a = ti.field(dtype=ti.f64, shape=2)
+
+ @ti.kernel
+ def foo():
+ a[0] = 1e-323
+ x = 1e-300
+ y = 1e-23
+ a[1] = x * y
+
+ foo()
+ assert a[0] == 0
+ assert a[1] == 0
diff --git a/tests/_python_orig/test_function.py b/tests/_python_orig/test_function.py
new file mode 100644
index 000000000..a0101e375
--- /dev/null
+++ b/tests/_python_orig/test_function.py
@@ -0,0 +1,289 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(experimental_real_function=True)
+def test_function_without_return():
+ x = ti.field(ti.i32, shape=())
+
+ @ti.func
+ def foo(val: ti.i32):
+ x[None] += val
+
+ @ti.kernel
+ def run():
+ foo(40)
+ foo(2)
+
+ x[None] = 0
+ run()
+ assert x[None] == 42
+
+
+@test_utils.test(experimental_real_function=True)
+def test_function_with_return():
+ x = ti.field(ti.i32, shape=())
+
+ @ti.func
+ def foo(val: ti.i32) -> ti.i32:
+ x[None] += val
+ return val
+
+ @ti.kernel
+ def run():
+ a = foo(40)
+ foo(2)
+ assert a == 40
+
+ x[None] = 0
+ run()
+ assert x[None] == 42
+
+
+@test_utils.test(experimental_real_function=True, exclude=[ti.opengl, ti.cc])
+def test_function_with_multiple_last_return():
+ x = ti.field(ti.i32, shape=())
+
+ @ti.func
+ def foo(val: ti.i32) -> ti.i32:
+ if x[None]:
+ x[None] += val * 2
+ return val * 2
+ else:
+ x[None] += val
+ return val
+
+ @ti.kernel
+ def run():
+ a = foo(40)
+ foo(1)
+ assert a == 40
+
+ x[None] = 0
+ run()
+ assert x[None] == 42
+
+
+@test_utils.test(experimental_real_function=True)
+def test_call_expressions():
+ x = ti.field(ti.i32, shape=())
+
+ @ti.func
+ def foo(val: ti.i32) -> ti.i32:
+ if x[None] > 10:
+ x[None] += 1
+ x[None] += val
+ return 0
+
+ @ti.kernel
+ def run():
+ assert foo(15) == 0
+ assert foo(10) == 0
+
+ x[None] = 0
+ run()
+ assert x[None] == 26
+
+
+@test_utils.test(arch=ti.cpu, experimental_real_function=True)
+def test_failing_multiple_return():
+ x = ti.field(ti.i32, shape=())
+
+ @ti.func
+ def foo(val: ti.i32) -> ti.i32:
+ if x[None] > 10:
+ if x[None] > 20:
+ return 1
+ x[None] += 1
+ x[None] += val
+ return 0
+
+ @ti.kernel
+ def run():
+ assert foo(15) == 0
+ assert foo(10) == 0
+ assert foo(100) == 1
+
+ with pytest.raises(AssertionError):
+ x[None] = 0
+ run()
+ assert x[None] == 26
+
+
+@test_utils.test(experimental_real_function=True)
+def test_python_function():
+ x = ti.field(ti.i32, shape=())
+
+ @ti.func
+ def inc(val: ti.i32):
+ x[None] += val
+
+ def identity(x):
+ return x
+
+ @ti.data_oriented
+ class A:
+ def __init__(self):
+ self.count = ti.field(ti.i32, shape=())
+ self.count[None] = 0
+
+ @ti.lang.kernel_impl.pyfunc
+ def dec(self, val: ti.i32) -> ti.i32:
+ self.count[None] += 1
+ x[None] -= val
+ return self.count[None]
+
+ @ti.kernel
+ def run(self) -> ti.i32:
+ a = self.dec(1)
+ identity(2)
+ inc(identity(3))
+ return a
+
+ a = A()
+ x[None] = 0
+ assert a.run() == 1
+ assert a.run() == 2
+ assert x[None] == 4
+ assert a.dec(4) == 3
+ assert x[None] == 0
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda], debug=True)
+def test_default_templates():
+ @ti.func
+ def func1(x: ti.template()):
+ x = 1
+
+ @ti.func
+ def func2(x: ti.template()):
+ x += 1
+
+ @ti.func
+ def func3(x):
+ x = 1
+
+ @ti.func
+ def func4(x):
+ x += 1
+
+ @ti.func
+ def func1_field(x: ti.template()):
+ x[None] = 1
+
+ @ti.func
+ def func2_field(x: ti.template()):
+ x[None] += 1
+
+ @ti.func
+ def func3_field(x):
+ x[None] = 1
+
+ @ti.func
+ def func4_field(x):
+ x[None] += 1
+
+ v = ti.field(dtype=ti.i32, shape=())
+
+ @ti.kernel
+ def run_func():
+ a = 0
+ func1(a)
+ assert a == 1
+ b = 0
+ func2(b)
+ assert b == 1
+ c = 0
+ func3(c)
+ assert c == 0
+ d = 0
+ func4(d)
+ assert d == 0
+
+ v[None] = 0
+ func1_field(v)
+ assert v[None] == 1
+ v[None] = 0
+ func2_field(v)
+ assert v[None] == 1
+ v[None] = 0
+ func3_field(v)
+ assert v[None] == 1
+ v[None] = 0
+ func4_field(v)
+ assert v[None] == 1
+
+ run_func()
+
+
+@test_utils.test(experimental_real_function=True)
+def test_experimental_templates():
+ x = ti.field(ti.i32, shape=())
+ y = ti.field(ti.i32, shape=())
+ answer = ti.field(ti.i32, shape=8)
+
+ @ti.kernel
+ def kernel_inc(x: ti.template()):
+ x[None] += 1
+
+ def run_kernel():
+ x[None] = 10
+ y[None] = 20
+ kernel_inc(x)
+ assert x[None] == 11
+ assert y[None] == 20
+ kernel_inc(y)
+ assert x[None] == 11
+ assert y[None] == 21
+
+ @ti.func
+ def inc(x: ti.template()):
+ x[None] += 1
+
+ @ti.kernel
+ def run_func():
+ x[None] = 10
+ y[None] = 20
+ inc(x)
+ answer[0] = x[None]
+ answer[1] = y[None]
+ inc(y)
+ answer[2] = x[None]
+ answer[3] = y[None]
+
+ def verify():
+ assert answer[0] == 11
+ assert answer[1] == 20
+ assert answer[2] == 11
+ assert answer[3] == 21
+
+ run_kernel()
+ run_func()
+ verify()
+
+
+@test_utils.test(experimental_real_function=True)
+def test_missing_arg_annotation():
+ with pytest.raises(ti.TaichiSyntaxError, match='must be type annotated'):
+
+ @ti.func
+ def add(a, b: ti.i32) -> ti.i32:
+ return a + b
+
+
+@test_utils.test(experimental_real_function=True)
+def test_missing_return_annotation():
+ with pytest.raises(ti.TaichiCompilationError,
+ match='return value must be annotated'):
+
+ @ti.func
+ def add(a: ti.i32, b: ti.i32):
+ return a + b
+
+ @ti.kernel
+ def run():
+ add(30, 2)
+
+ run()
diff --git a/tests/_python_orig/test_function_parameter_by_value.py b/tests/_python_orig/test_function_parameter_by_value.py
new file mode 100644
index 000000000..c27d5988c
--- /dev/null
+++ b/tests/_python_orig/test_function_parameter_by_value.py
@@ -0,0 +1,18 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_pass_by_value():
+ @ti.func
+ def set_val(x, i):
+ x = i
+
+ ret = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def task():
+ set_val(ret[None], 112)
+
+ task()
+ assert ret[None] == 0
diff --git a/tests/_python_orig/test_fuse_dense.py b/tests/_python_orig/test_fuse_dense.py
new file mode 100644
index 000000000..a08a0fe47
--- /dev/null
+++ b/tests/_python_orig/test_fuse_dense.py
@@ -0,0 +1,40 @@
+import taichi as ti
+from tests import test_utils
+
+from .fuse_test_template import (template_fuse_dense_x2y2z,
+ template_fuse_reduction)
+
+
+@test_utils.test(require=ti.extension.async_mode, async_mode=True)
+def test_fuse_dense_x2y2z():
+ template_fuse_dense_x2y2z(size=10 * 1024**2)
+
+
+@test_utils.test(require=ti.extension.async_mode, async_mode=True)
+def test_fuse_reduction():
+ template_fuse_reduction(size=10 * 1024**2)
+
+
+@test_utils.test(require=ti.extension.async_mode, async_mode=True)
+def test_no_fuse_sigs_mismatch():
+ n = 4096
+ x = ti.field(ti.i32, shape=(n, ))
+
+ @ti.kernel
+ def inc_i():
+ for i in x:
+ x[i] += i
+
+ @ti.kernel
+ def inc_by(k: ti.i32):
+ for i in x:
+ x[i] += k
+
+ repeat = 5
+ for i in range(repeat):
+ inc_i()
+ inc_by(i)
+
+ x = x.to_numpy()
+ for i in range(n):
+ assert x[i] == i * repeat + ((repeat - 1) * repeat // 2)
diff --git a/tests/_python_orig/test_fuse_dynamic.py b/tests/_python_orig/test_fuse_dynamic.py
new file mode 100644
index 000000000..e514fd05b
--- /dev/null
+++ b/tests/_python_orig/test_fuse_dynamic.py
@@ -0,0 +1,62 @@
+import time
+
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+def benchmark_fuse_dynamic_x2y2z(size=1024**2, repeat=10, first_n=100):
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+ z = ti.field(ti.i32)
+
+ ti.root.dynamic(ti.i, size, chunk_size=2048).place(x, y, z)
+
+ @ti.kernel
+ def x_to_y():
+ for i in x:
+ y[i] = x[i] + 1
+
+ @ti.kernel
+ def y_to_z():
+ for i in x:
+ z[i] = y[i] + 4
+
+ first_n = min(first_n, size)
+
+ for i in range(first_n):
+ x[i] = i * 10
+
+ for _ in range(repeat):
+ t = time.time()
+ x_to_y()
+ ti.sync()
+ print('x_to_y', time.time() - t)
+ print('')
+
+ for _ in range(repeat):
+ t = time.time()
+ y_to_z()
+ ti.sync()
+ print('y_to_z', time.time() - t)
+ print('')
+
+ for _ in range(repeat):
+ t = time.time()
+ x_to_y()
+ y_to_z()
+ ti.sync()
+ print('fused x->y->z', time.time() - t)
+ print('')
+
+ for i in range(first_n):
+ assert x[i] == i * 10
+ assert y[i] == x[i] + 1
+ assert z[i] == x[i] + 5
+
+
+@test_utils.test(require=[ti.extension.async_mode, ti.extension.sparse],
+ async_mode=True)
+def test_fuse_dynamic_x2y2z():
+ benchmark_fuse_dynamic_x2y2z()
diff --git a/tests/_python_orig/test_gc.py b/tests/_python_orig/test_gc.py
new file mode 100644
index 000000000..afa050ca5
--- /dev/null
+++ b/tests/_python_orig/test_gc.py
@@ -0,0 +1,135 @@
+import taichi as ti
+from tests import test_utils
+
+
+def _test_block_gc():
+ N = 100000
+
+ dx = 1 / 128
+ inv_dx = 1.0 / dx
+
+ x = ti.Vector.field(2, dtype=ti.f32)
+
+ indices = ti.ij
+
+ grid_m = ti.field(dtype=ti.i32)
+
+ grid = ti.root.pointer(indices, 64)
+ grid.pointer(indices, 32).dense(indices, 8).place(grid_m)
+
+ ti.root.dense(ti.i, N).place(x)
+
+ @ti.kernel
+ def init():
+ for i in x:
+ x[i] = ti.Vector(
+ [ti.random() * 0.1 + 0.5,
+ ti.random() * 0.1 + 0.5])
+
+ init()
+
+ @ti.kernel
+ def build_grid():
+ for p in x:
+ base = int(ti.floor(x[p] * inv_dx - 0.5))
+ grid_m[base] += 1
+
+ @ti.kernel
+ def move():
+ for p in x:
+ x[p] += ti.Vector([0.0, 0.1])
+
+ assert grid._num_dynamically_allocated == 0
+ for _ in range(100):
+ grid.deactivate_all()
+ # Scatter the particles to the sparse grid
+ build_grid()
+ # Move the block of particles
+ move()
+
+ ti.sync()
+ # The block of particles can occupy at most two blocks on the sparse grid.
+ # It's fine to run 100 times and do just one final check, because
+ # num_dynamically_allocated stores the number of slots *ever* allocated.
+ assert 1 <= grid._num_dynamically_allocated <= 2, grid._num_dynamically_allocated
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_block():
+ _test_block_gc()
+
+
+#TODO: Remove exclude of ti.metal.
+@test_utils.test(require=[ti.extension.sparse, ti.extension.async_mode],
+ exclude=[ti.metal],
+ async_mode=True)
+def test_block_async():
+ _test_block_gc()
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_dynamic_gc():
+ x = ti.field(dtype=ti.i32)
+
+ L = ti.root.dynamic(ti.i, 1024 * 1024, chunk_size=1024)
+ L.place(x)
+
+ assert L._num_dynamically_allocated == 0
+
+ for i in range(100):
+ x[1024] = 1
+ L.deactivate_all()
+ assert L._num_dynamically_allocated <= 2
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_pointer_gc():
+ x = ti.field(dtype=ti.i32)
+
+ L = ti.root.pointer(ti.ij, 32)
+ L.pointer(ti.ij, 32).dense(ti.ij, 8).place(x)
+
+ assert L._num_dynamically_allocated == 0
+
+ for i in range(1024):
+ x[i * 8, i * 8] = 1
+ assert L._num_dynamically_allocated == 1
+ L.deactivate_all()
+
+ # Note that being inactive doesn't mean it's not allocated.
+ assert L._num_dynamically_allocated == 1
+
+
+@test_utils.test(require=[ti.extension.sparse, ti.extension.async_mode],
+ async_mode=True)
+def test_fuse_allocator_state():
+ N = 16
+ x = ti.field(dtype=ti.i32, shape=N)
+ y = ti.field(dtype=ti.i32)
+
+ y_parent = ti.root.pointer(ti.i, N * 2)
+ y_parent.place(y)
+
+ # https://github.com/taichi-dev/taichi/pull/1973#pullrequestreview-511154376
+
+ @ti.kernel
+ def activate_y():
+ for i in x:
+ idx = i + 1
+ y[idx] = idx
+
+ @ti.kernel
+ def deactivate_y():
+ for i in x:
+ ti.deactivate(y_parent, i)
+
+ activate_y()
+ deactivate_y()
+ ti.sync()
+
+ # TODO: assert that activate_y and deactivate_y are not fused.
+ assert y_parent._num_dynamically_allocated == N
+ ys = y.to_numpy()
+ for i, y in enumerate(ys):
+ expected = N if i == N else 0
+ assert y == expected
diff --git a/tests/_python_orig/test_get_external_tensor_shape.py b/tests/_python_orig/test_get_external_tensor_shape.py
new file mode 100644
index 000000000..fa5578fc0
--- /dev/null
+++ b/tests/_python_orig/test_get_external_tensor_shape.py
@@ -0,0 +1,72 @@
+import numpy as np
+import pytest
+from taichi.lang.util import has_pytorch
+
+import taichi as ti
+from tests import test_utils
+
+if has_pytorch():
+ import torch
+
+
+@pytest.mark.parametrize('size', [[1], [1, 2, 3, 4]])
+@test_utils.test()
+def test_get_external_tensor_shape_access_numpy(size):
+ @ti.kernel
+ def func(x: ti.ext_arr(), index: ti.template()) -> ti.i32:
+ return x.shape[index]
+
+ x_hat = np.ones(size, dtype=np.int32)
+ for idx, y_ref in enumerate(size):
+ y_hat = func(x_hat, idx)
+ assert y_ref == y_hat, "Size of axis {} should equal {} and not {}.".format(
+ idx, y_ref, y_hat)
+
+
+@pytest.mark.parametrize('size', [[1, 1], [2, 2]])
+@test_utils.test()
+def test_get_external_tensor_shape_sum_numpy(size):
+ @ti.kernel
+ def func(x: ti.ext_arr()) -> ti.i32:
+ y = 0
+ for i in range(x.shape[0]):
+ for j in range(x.shape[1]):
+ y += x[i, j]
+ return y
+
+ x_hat = np.ones(size, dtype=np.int32)
+ x_ref = x_hat
+ y_hat = func(x_hat)
+ y_ref = x_ref.sum()
+ assert y_ref == y_hat, "Output should equal {} and not {}.".format(
+ y_ref, y_hat)
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@pytest.mark.parametrize('size', [[1, 2, 3, 4]])
+@test_utils.test(exclude=ti.opengl)
+def test_get_external_tensor_shape_access_torch(size):
+ @ti.kernel
+ def func(x: ti.ext_arr(), index: ti.template()) -> ti.i32:
+ return x.shape[index]
+
+ x_hat = torch.ones(size, dtype=torch.int32, device='cpu')
+ for idx, y_ref in enumerate(size):
+ y_hat = func(x_hat, idx)
+ assert y_ref == y_hat, "Size of axis {} should equal {} and not {}.".format(
+ idx, y_ref, y_hat)
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@pytest.mark.parametrize('size', [[1, 2, 3, 4]])
+@test_utils.test(arch=[ti.cpu, ti.cuda, ti.opengl])
+def test_get_external_tensor_shape_access_ndarray(size):
+ @ti.kernel
+ def func(x: ti.any_arr(), index: ti.template()) -> ti.i32:
+ return x.shape[index]
+
+ x_hat = ti.ndarray(ti.i32, shape=size)
+ for idx, y_ref in enumerate(size):
+ y_hat = func(x_hat, idx)
+ assert y_ref == y_hat, "Size of axis {} should equal {} and not {}.".format(
+ idx, y_ref, y_hat)
diff --git a/tests/_python_orig/test_ggui.py b/tests/_python_orig/test_ggui.py
new file mode 100644
index 000000000..1865cd830
--- /dev/null
+++ b/tests/_python_orig/test_ggui.py
@@ -0,0 +1,291 @@
+import os
+import pathlib
+import platform
+import tempfile
+
+import numpy as np
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+REGENERATE_GROUNDTRUTH_IMAGES = False
+RENDER_REPEAT = 5
+supported_archs = [ti.vulkan, ti.cuda]
+
+
+def get_temp_png():
+ f, name = tempfile.mkstemp(suffix='.png')
+ os.close(f)
+ return name
+
+
+def write_temp_image(window):
+ f = get_temp_png()
+ window.write_image(f)
+ try:
+ os.remove(f)
+ except OSError:
+ pass
+
+
+def verify_image(window, image_name, tolerence=0.1):
+ if REGENERATE_GROUNDTRUTH_IMAGES:
+ ground_truth_name = f"tests/python/expected/{image_name}.png"
+ window.write_image(ground_truth_name)
+ else:
+ ground_truth_name = str(
+ pathlib.Path(__file__).parent) + f"/expected/{image_name}.png"
+ actual_name = get_temp_png()
+ window.write_image(actual_name)
+ ground_truth_np = ti.imread(ground_truth_name)
+ actual_np = ti.imread(actual_name)
+ assert len(ground_truth_np.shape) == len(actual_np.shape)
+ for i in range(len(ground_truth_np.shape)):
+ assert ground_truth_np.shape[i] == actual_np.shape[i]
+ diff = ground_truth_np - actual_np
+ mse = np.mean(diff * diff)
+ assert mse <= tolerence # the pixel values are 0~255
+ os.remove(actual_name)
+
+
+@pytest.mark.skipif(not ti.ui.GGUI_AVAILABLE, reason="GGUI Not Available")
+@test_utils.test(arch=supported_archs)
+def test_geometry_2d():
+ window = ti.ui.Window('test', (640, 480), show_window=False)
+ canvas = window.get_canvas()
+
+ # simple circles
+ n_circles_0 = 10
+ circle_positions_0 = ti.Vector.field(2, ti.f32, shape=n_circles_0)
+ for i in range(n_circles_0):
+ circle_positions_0[i] = ti.Vector([0.1, i * 0.1])
+
+ # circles with per vertex colors
+ n_circles_1 = 10
+ circle_positions_1 = ti.Vector.field(2, ti.f32, shape=n_circles_1)
+ circle_colors_1 = ti.Vector.field(3, ti.f32, shape=n_circles_1)
+ for i in range(n_circles_0):
+ circle_positions_1[i] = ti.Vector([0.2, i * 0.1])
+ circle_colors_1[i] = ti.Vector([i * 0.1, 1.0 - i * 0.1, 0.5])
+
+ # simple triangles
+ n_triangles_0 = 10
+ triangles_positions_0 = ti.Vector.field(2, ti.f32, shape=3 * n_triangles_0)
+ for i in range(n_triangles_0):
+ triangles_positions_0[3 * i] = ti.Vector([0.3, i * 0.1])
+ triangles_positions_0[3 * i + 1] = ti.Vector([0.35, i * 0.1])
+ triangles_positions_0[3 * i + 2] = ti.Vector([0.35, i * 0.1 + 0.05])
+
+ # triangles with per vertex colors and indices
+ triangles_positions_1 = ti.Vector.field(2, ti.f32, shape=4)
+ triangles_colors_1 = ti.Vector.field(3, ti.f32, shape=4)
+ triangles_positions_1[0] = ti.Vector([0.4, 0])
+ triangles_positions_1[1] = ti.Vector([0.4, 1])
+ triangles_positions_1[2] = ti.Vector([0.45, 0])
+ triangles_positions_1[3] = ti.Vector([0.45, 1])
+ triangles_colors_1[0] = ti.Vector([0, 0, 0])
+ triangles_colors_1[1] = ti.Vector([1, 0, 0])
+ triangles_colors_1[2] = ti.Vector([0, 1, 0])
+ triangles_colors_1[3] = ti.Vector([1, 1, 0])
+ triangle_indices_1 = ti.Vector.field(3, ti.i32, shape=2)
+ triangle_indices_1[0] = ti.Vector([0, 1, 3])
+ triangle_indices_1[1] = ti.Vector([0, 2, 3])
+
+ # simple lines
+ n_lines_0 = 10
+ lines_positions_0 = ti.Vector.field(2, ti.f32, shape=2 * n_lines_0)
+ for i in range(n_lines_0):
+ lines_positions_0[2 * i] = ti.Vector([0.5, i * 0.1])
+ lines_positions_0[2 * i + 1] = ti.Vector([0.5, i * 0.1 + 0.05])
+
+ # lines with per vertex colors and indices
+ lines_positions_1 = ti.Vector.field(2, ti.f32, shape=4)
+ lines_colors_1 = ti.Vector.field(3, ti.f32, shape=4)
+ lines_positions_1[0] = ti.Vector([0.6, 0])
+ lines_positions_1[1] = ti.Vector([0.6, 1])
+ lines_positions_1[2] = ti.Vector([0.65, 0])
+ lines_positions_1[3] = ti.Vector([0.65, 1])
+ lines_colors_1[0] = ti.Vector([0, 0, 0])
+ lines_colors_1[1] = ti.Vector([1, 0, 0])
+ lines_colors_1[2] = ti.Vector([0, 1, 0])
+ lines_colors_1[3] = ti.Vector([1, 1, 0])
+ lines_indices_1 = ti.Vector.field(2, ti.i32, shape=6)
+ line_id = 0
+ for i in range(4):
+ for j in range(i + 1, 4):
+ lines_indices_1[line_id] = ti.Vector([i, j])
+ line_id += 1
+
+ def render():
+
+ canvas.circles(circle_positions_0, radius=0.05, color=(1, 0, 0))
+
+ canvas.circles(circle_positions_1,
+ radius=0.05,
+ per_vertex_color=circle_colors_1)
+
+ canvas.triangles(triangles_positions_0, color=(0, 0, 1))
+
+ canvas.triangles(triangles_positions_1,
+ per_vertex_color=triangles_colors_1,
+ indices=triangle_indices_1)
+
+ canvas.lines(lines_positions_0, width=0.01, color=(0, 1, 0))
+
+ canvas.lines(lines_positions_1,
+ width=0.01,
+ per_vertex_color=lines_colors_1,
+ indices=lines_indices_1)
+
+ for _ in range(RENDER_REPEAT):
+ render()
+ write_temp_image(window)
+ render()
+ if (platform.system() == 'Darwin'):
+ # FIXME: Use lower tolerence when macOS ggui supports wide lines
+ verify_image(window, 'test_geometry_2d', 1.0)
+ else:
+ verify_image(window, 'test_geometry_2d')
+ window.destroy()
+
+
+@pytest.mark.skipif(not ti.ui.GGUI_AVAILABLE, reason="GGUI Not Available")
+@test_utils.test(arch=supported_archs)
+def test_geometry_3d():
+ window = ti.ui.Window('test', (640, 480), show_window=False)
+ canvas = window.get_canvas()
+ scene = ti.ui.Scene()
+ camera = ti.ui.make_camera()
+ camera.position(0.0, 0.0, 1.5)
+ camera.lookat(0.0, 0.0, 0)
+ scene.set_camera(camera)
+
+ # simple particles
+ num_per_dim = 32
+ num_particles_0 = int(num_per_dim**3)
+ particles_positions_0 = ti.Vector.field(3, ti.f32, shape=num_particles_0)
+
+ @ti.kernel
+ def init_particles_0():
+ for x, y, z in ti.ndrange(num_per_dim, num_per_dim, num_per_dim):
+ i = x * (num_per_dim**2) + y * num_per_dim + z
+ gap = 0.01
+ particles_positions_0[i] = ti.Vector(
+ [-0.4, 0, 0.0],
+ dt=ti.f32) + ti.Vector([x, y, z], dt=ti.f32) * gap
+
+ init_particles_0()
+
+ # particles with individual colors
+ num_per_dim = 32
+ num_particles_1 = int(num_per_dim**3)
+ particles_positions_1 = ti.Vector.field(3, ti.f32, shape=num_particles_1)
+ particles_colors_1 = ti.Vector.field(3, ti.f32, shape=num_particles_1)
+
+ @ti.kernel
+ def init_particles_1():
+ for x, y, z in ti.ndrange(num_per_dim, num_per_dim, num_per_dim):
+ i = x * (num_per_dim**2) + y * num_per_dim + z
+ gap = 0.01
+ particles_positions_1[i] = ti.Vector(
+ [0.2, 0, 0.0],
+ dt=ti.f32) + ti.Vector([x, y, z], dt=ti.f32) * gap
+ particles_colors_1[i] = ti.Vector([x, y, z],
+ dt=ti.f32) / num_per_dim
+
+ init_particles_1()
+
+ # mesh
+ vertices = ti.Vector.field(3, ti.f32, shape=8)
+ colors = ti.Vector.field(3, ti.f32, shape=8)
+
+ @ti.kernel
+ def init_mesh():
+ for i, j, k in ti.ndrange(2, 2, 2):
+ index = i * 4 + j * 2 + k
+ vertices[index] = ti.Vector(
+ [-0.1, -0.3, 0.0],
+ dt=ti.f32) + ti.Vector([i, j, k], dt=ti.f32) * 0.25
+ colors[index] = ti.Vector([i, j, k], dt=ti.f32)
+
+ init_mesh()
+ indices = ti.field(ti.i32, shape=36)
+ indices_np = np.array([
+ 0, 1, 2, 3, 1, 2, 4, 5, 6, 7, 5, 6, 0, 1, 4, 5, 1, 4, 2, 3, 6, 7, 3, 6,
+ 0, 2, 4, 6, 2, 4, 1, 3, 5, 7, 3, 5
+ ],
+ dtype=np.int32)
+ indices.from_numpy(indices_np)
+
+ def render():
+ scene.point_light(pos=(2, 2, 2), color=(1, 1, 1))
+
+ scene.particles(particles_positions_0, radius=0.01, color=(0.5, 0, 0))
+
+ scene.particles(particles_positions_1,
+ radius=0.01,
+ per_vertex_color=particles_colors_1)
+
+ scene.mesh(vertices,
+ per_vertex_color=colors,
+ indices=indices,
+ two_sided=True)
+
+ canvas.scene(scene)
+
+ for _ in range(RENDER_REPEAT):
+ render()
+ write_temp_image(window)
+ render()
+ verify_image(window, 'test_geometry_3d')
+ window.destroy()
+
+
+@pytest.mark.skipif(not ti.ui.GGUI_AVAILABLE, reason="GGUI Not Available")
+@test_utils.test(arch=supported_archs)
+def test_set_image():
+ window = ti.ui.Window('test', (640, 480), show_window=False)
+ canvas = window.get_canvas()
+
+ img = ti.Vector.field(4, ti.f32, (512, 512))
+
+ @ti.kernel
+ def init_img():
+ for i, j in img:
+ img[i, j] = ti.Vector([i, j, 0, 512], dt=ti.f32) / 512
+
+ init_img()
+
+ def render():
+ canvas.set_image(img)
+
+ for _ in range(RENDER_REPEAT):
+ render()
+ write_temp_image(window)
+ render()
+ verify_image(window, 'test_set_image')
+ window.destroy()
+
+
+@pytest.mark.skipif(not ti.ui.GGUI_AVAILABLE, reason="GGUI Not Available")
+@test_utils.test(arch=supported_archs)
+def test_imgui():
+ window = ti.ui.Window('test', (640, 480), show_window=False)
+
+ def render():
+ with window.GUI.sub_window("window 0", 0.1, 0.1, 0.8, 0.2) as w:
+ w.text("Hello Taichi!")
+ w.text("Hello Again!")
+ with window.GUI.sub_window("window 1", 0.1, 0.4, 0.8, 0.2) as w:
+ w.button("Press to unlease creativity")
+ w.slider_float('creativity level', 100.0, 0.0, 100.0)
+ with window.GUI.sub_window("window 2", 0.1, 0.7, 0.8, 0.2) as w:
+ w.color_edit_3('Heyy', (0, 0, 1))
+
+ for _ in range(RENDER_REPEAT):
+ render()
+ write_temp_image(window)
+ render()
+ verify_image(window, 'test_imgui')
+ window.destroy()
diff --git a/tests/_python_orig/test_global_buffer_misalined.py b/tests/_python_orig/test_global_buffer_misalined.py
new file mode 100644
index 000000000..eab4524d2
--- /dev/null
+++ b/tests/_python_orig/test_global_buffer_misalined.py
@@ -0,0 +1,15 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.data64)
+def test_global_buffer_misalignment():
+ @ti.kernel
+ def test(x: ti.f32):
+ a = x
+ b = ti.cast(0.12, ti.f64)
+ for i in range(8):
+ b += a
+
+ for i in range(8):
+ test(0.1)
diff --git a/tests/_python_orig/test_global_store_grad.py b/tests/_python_orig/test_global_store_grad.py
new file mode 100644
index 000000000..6f98e9f01
--- /dev/null
+++ b/tests/_python_orig/test_global_store_grad.py
@@ -0,0 +1,32 @@
+"""
+import taichi as ti
+
+ti.lang.impl.current_cfg().print_ir = True
+
+
+def test_global_store_branching():
+ # ti.reset()
+
+ N = 16
+ x = ti.field(ti.f32)
+ y = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, N).place(x)
+ ti.root.dense(ti.i, N).place(y)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def oldeven():
+ for i in range(N):
+ if i % 2 == 0:
+ x[i] = y[i]
+
+ for i in range(N):
+ x.grad[i] = 1
+
+ oldeven()
+ oldeven.grad()
+
+ for i in range(N):
+ assert y.grad[i] == (i % 2 == 0)
+"""
diff --git a/tests/_python_orig/test_global_thread_idx.py b/tests/_python_orig/test_global_thread_idx.py
new file mode 100644
index 000000000..6e0d58690
--- /dev/null
+++ b/tests/_python_orig/test_global_thread_idx.py
@@ -0,0 +1,19 @@
+import numpy as np
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=ti.cuda)
+def test_global_thread_idx():
+ n = 2048
+ x = ti.field(ti.i32, shape=n)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ tid = ti.global_thread_idx()
+ x[tid] = tid
+
+ func()
+ assert np.arange(n).sum() == x.to_numpy().sum()
diff --git a/tests/_python_orig/test_grouped.py b/tests/_python_orig/test_grouped.py
new file mode 100644
index 000000000..0757c7f92
--- /dev/null
+++ b/tests/_python_orig/test_grouped.py
@@ -0,0 +1,182 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_vector_index():
+ val = ti.field(ti.i32)
+
+ n = 4
+ m = 7
+ p = 11
+
+ ti.root.dense(ti.i, n).dense(ti.j, m).dense(ti.k, p).place(val)
+
+ @ti.kernel
+ def test():
+ for i in range(n):
+ for j in range(m):
+ for k in range(p):
+ I = ti.Vector([i, j, k])
+ val[I] = i + j * 2 + k * 3
+
+ test()
+
+ for i in range(n):
+ for j in range(m):
+ for k in range(p):
+ assert val[i, j, k] == i + j * 2 + k * 3
+
+
+@test_utils.test()
+def test_grouped():
+ val = ti.field(ti.i32)
+
+ n = 4
+ m = 8
+ p = 16
+
+ ti.root.dense(ti.i, n).dense(ti.j, m).dense(ti.k, p).place(val)
+
+ @ti.kernel
+ def test():
+ for I in ti.grouped(val):
+ val[I] = I[0] + I[1] * 2 + I[2] * 3
+
+ test()
+
+ for i in range(n):
+ for j in range(m):
+ for k in range(p):
+ assert val[i, j, k] == i + j * 2 + k * 3
+
+
+@test_utils.test()
+def test_grouped_ndrange():
+ val = ti.field(ti.i32)
+
+ n = 4
+ m = 8
+
+ ti.root.dense(ti.ij, (n, m)).place(val)
+
+ x0 = 2
+ y0 = 3
+ x1 = 1
+ y1 = 6
+
+ @ti.kernel
+ def test():
+ for I in ti.grouped(ti.ndrange((x0, y0), (x1, y1))):
+ val[I] = I[0] + I[1] * 2
+
+ test()
+
+ for i in range(n):
+ for j in range(m):
+ assert val[i, j] == (i +
+ j * 2 if x0 <= i < y0 and x1 <= j < y1 else 0)
+
+
+@test_utils.test()
+def test_static_grouped_ndrange():
+ val = ti.field(ti.i32)
+
+ n = 4
+ m = 8
+
+ ti.root.dense(ti.ij, (n, m)).place(val)
+
+ x0 = 2
+ y0 = 3
+ x1 = 1
+ y1 = 6
+
+ @ti.kernel
+ def test():
+ for I in ti.static(ti.grouped(ti.ndrange((x0, y0), (x1, y1)))):
+ val[I] = I[0] + I[1] * 2
+
+ test()
+
+ for i in range(n):
+ for j in range(m):
+ assert val[i, j] == (i +
+ j * 2 if x0 <= i < y0 and x1 <= j < y1 else 0)
+
+
+@test_utils.test()
+def test_grouped_ndrange_starred():
+ val = ti.field(ti.i32)
+
+ n = 4
+ m = 8
+ p = 16
+ dim = 3
+
+ ti.root.dense(ti.ijk, (n, m, p)).place(val)
+
+ @ti.kernel
+ def test():
+ for I in ti.grouped(ti.ndrange(*(((0, n), ) * dim))):
+ val[I] = I[0] + I[1] * 2 + I[2] * 3
+
+ test()
+
+ for i in range(n):
+ for j in range(m):
+ for k in range(p):
+ assert val[i, j,
+ k] == (i + j * 2 + k * 3 if j < n and k < n else 0)
+
+
+@test_utils.test()
+def test_grouped_ndrange_0d():
+ val = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def test():
+ for I in ti.grouped(ti.ndrange()):
+ val[I] = 42
+
+ test()
+
+ assert val[None] == 42
+
+
+@test_utils.test()
+def test_static_grouped_ndrange_0d():
+ val = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def test():
+ for I in ti.static(ti.grouped(ti.ndrange())):
+ val[I] = 42
+
+ test()
+
+ assert val[None] == 42
+
+
+@test_utils.test()
+def test_static_grouped_func():
+
+ K = 3
+ dim = 2
+
+ v = ti.Vector.field(K, dtype=ti.i32, shape=((K, ) * dim))
+
+ def stencil_range():
+ return ti.ndrange(*((K, ) * (dim + 1)))
+
+ @ti.kernel
+ def p2g():
+ for I in ti.static(ti.grouped(stencil_range())):
+ v[I[0], I[1]][I[2]] = I[0] + I[1] * 3 + I[2] * 10
+
+ p2g()
+
+ for i in range(K):
+ for j in range(K):
+ for k in range(K):
+ assert v[i, j][k] == i + j * 3 + k * 10
diff --git a/tests/_python_orig/test_gui.py b/tests/_python_orig/test_gui.py
new file mode 100644
index 000000000..dbb0bae7a
--- /dev/null
+++ b/tests/_python_orig/test_gui.py
@@ -0,0 +1,32 @@
+import numpy as np
+import pytest
+from taichi.lang.misc import get_host_arch_list
+
+import taichi as ti
+from tests import test_utils
+
+
+@pytest.mark.parametrize('dtype', [ti.u8, ti.f32])
+@test_utils.test(arch=get_host_arch_list())
+def test_save_image_without_window(dtype):
+ n = 255
+ pixels = ti.field(dtype=dtype, shape=(n, n, 3))
+
+ @ti.kernel
+ def paint(c: dtype):
+ for i, j, k in pixels:
+ pixels[i, j, k] = c
+
+ gui = ti.GUI("Test", res=(n, n), show_gui=False)
+ for i in [0, 32, 64, 128, 255]:
+ if dtype is ti.u8:
+ paint(i)
+ else:
+ paint(i * 1.0 / n)
+ gui.set_image(pixels)
+ image_path = test_utils.make_temp_file(suffix='.png')
+ gui.show(image_path)
+ image = ti.imread(image_path)
+ delta = (image - i).sum()
+ assert delta == 0, "Expected image difference to be 0 but got {} instead.".format(
+ delta)
diff --git a/tests/_python_orig/test_image_io.py b/tests/_python_orig/test_image_io.py
new file mode 100644
index 000000000..fd04ea19b
--- /dev/null
+++ b/tests/_python_orig/test_image_io.py
@@ -0,0 +1,91 @@
+import os
+
+import numpy as np
+import pytest
+from taichi.lang.misc import get_host_arch_list
+from taichi.lang.util import to_numpy_type
+
+import taichi as ti
+from tests import test_utils
+
+
+# jpg is also supported but hard to test here since it's lossy:
+@pytest.mark.parametrize('comp,ext', [(3, 'bmp'), (1, 'png'), (3, 'png'),
+ (4, 'png')])
+@pytest.mark.parametrize('resx,resy', [(201, 173)])
+@pytest.mark.parametrize('is_field', [False, True])
+@pytest.mark.parametrize('dt', [ti.u8])
+@test_utils.test(arch=get_host_arch_list())
+def test_image_io(resx, resy, comp, ext, is_field, dt):
+ if comp != 1:
+ shape = (resx, resy, comp)
+ else:
+ shape = (resx, resy)
+ if is_field:
+ pixel_t = ti.field(dt, shape)
+ pixel = np.random.randint(256, size=shape, dtype=to_numpy_type(dt))
+ if is_field:
+ pixel_t.from_numpy(pixel)
+ fn = test_utils.make_temp_file(suffix='.' + ext)
+ if is_field:
+ ti.imwrite(pixel_t, fn)
+ else:
+ ti.imwrite(pixel, fn)
+ pixel_r = ti.imread(fn)
+ if comp == 1:
+ # from (resx, resy, 1) to (resx, resy)
+ pixel_r = pixel_r.reshape((resx, resy))
+ assert (pixel_r == pixel).all()
+ os.remove(fn)
+
+
+@pytest.mark.parametrize('comp,ext', [(3, 'png'), (4, 'png')])
+@pytest.mark.parametrize('resx,resy', [(91, 81)])
+@pytest.mark.parametrize('dt', [ti.f32, ti.f64])
+@test_utils.test(arch=get_host_arch_list())
+def test_image_io_vector(resx, resy, comp, ext, dt):
+ shape = (resx, resy)
+ pixel = np.random.rand(*shape, comp).astype(to_numpy_type(dt))
+ pixel_t = ti.Vector.field(comp, dt, shape)
+ pixel_t.from_numpy(pixel)
+ fn = test_utils.make_temp_file(suffix='.' + ext)
+ ti.imwrite(pixel_t, fn)
+ pixel_r = (ti.imread(fn).astype(to_numpy_type(dt)) + 0.5) / 256.0
+ assert np.allclose(pixel_r, pixel, atol=2e-2)
+ os.remove(fn)
+
+
+@pytest.mark.parametrize('comp,ext', [(3, 'png')])
+@pytest.mark.parametrize('resx,resy', [(91, 81)])
+@pytest.mark.parametrize('dt', [ti.u16, ti.u32, ti.u64])
+@test_utils.test(arch=get_host_arch_list())
+def test_image_io_uint(resx, resy, comp, ext, dt):
+ shape = (resx, resy)
+ np_type = to_numpy_type(dt)
+ # When saving to disk, pixel data will be truncated into 8 bits.
+ # Be careful here if you want lossless saving.
+ np_max = np.iinfo(np_type).max // 256
+ pixel = np.random.randint(256, size=(*shape, comp), dtype=np_type) * np_max
+ pixel_t = ti.Vector.field(comp, dt, shape)
+ pixel_t.from_numpy(pixel)
+ fn = test_utils.make_temp_file(suffix='.' + ext)
+ ti.imwrite(pixel_t, fn)
+ pixel_r = ti.imread(fn).astype(np_type) * np_max
+ assert (pixel_r == pixel).all()
+ os.remove(fn)
+
+
+@pytest.mark.parametrize('comp', [1, 3])
+@pytest.mark.parametrize('resx,resy', [(91, 81)])
+@pytest.mark.parametrize('scale', [1, 2, 3])
+@test_utils.test(arch=get_host_arch_list())
+def test_image_resize_sum(resx, resy, comp, scale):
+ shape = (resx, resy)
+ if comp != 1:
+ shape = shape + (comp, )
+ old_img = np.random.rand(*shape).astype(np.float32)
+ if resx == resy:
+ new_img = ti.imresize(old_img, resx * scale)
+ else:
+ new_img = ti.imresize(old_img, resx * scale, resy * scale)
+ assert np.sum(old_img) * scale**2 == test_utils.approx(np.sum(new_img))
diff --git a/tests/_python_orig/test_immediate_layout.py b/tests/_python_orig/test_immediate_layout.py
new file mode 100644
index 000000000..065b3b04c
--- /dev/null
+++ b/tests/_python_orig/test_immediate_layout.py
@@ -0,0 +1,13 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_1D():
+ N = 2
+ x = ti.field(ti.f32)
+ ti.root.dense(ti.i, N).place(x)
+
+ x[0] = 42
+ assert x[0] == 42
+ assert x[1] == 0
diff --git a/tests/_python_orig/test_indices.py b/tests/_python_orig/test_indices.py
new file mode 100644
index 000000000..fd676fae9
--- /dev/null
+++ b/tests/_python_orig/test_indices.py
@@ -0,0 +1,56 @@
+from taichi.lang.misc import get_host_arch_list
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_indices():
+ a = ti.field(ti.f32, shape=(128, 32, 8))
+
+ b = ti.field(ti.f32)
+ ti.root.dense(ti.j, 32).dense(ti.i, 16).place(b)
+
+ mapping_a = a.snode._physical_index_position()
+
+ assert mapping_a == {0: 0, 1: 1, 2: 2}
+
+ mapping_b = b.snode._physical_index_position()
+
+ assert mapping_b == {0: 0, 1: 1}
+ # Note that b is column-major:
+ # the virtual first index exposed to the user comes second in memory layout.
+
+ @ti.kernel
+ def fill():
+ for i, j in b:
+ b[i, j] = i * 10 + j
+
+ @ti.kernel
+ def get_field_addr(i: ti.i32, j: ti.i32) -> ti.u64:
+ return ti.get_addr(b, [i, j])
+
+ fill()
+ for i in range(16):
+ for j in range(32):
+ assert b[i, j] == i * 10 + j
+ assert get_field_addr(0, 1) + 4 == get_field_addr(1, 1)
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_float_as_index():
+ a = ti.field(ti.f32, (8, 5))
+
+ @ti.kernel
+ def func():
+ i = 6.66
+ j = 3
+ I = ti.Vector([2, 1])
+ for _ in range(1): # prevent constant fold
+ a[i, j] = 233
+ a[I + ti.Vector([1, 3.0])] = 666
+
+ func()
+
+ assert a[6, 3] == 233
+ assert a[3, 4] == 666
diff --git a/tests/_python_orig/test_indices_assert.py b/tests/_python_orig/test_indices_assert.py
new file mode 100644
index 000000000..2be507fd8
--- /dev/null
+++ b/tests/_python_orig/test_indices_assert.py
@@ -0,0 +1,23 @@
+import platform
+
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@pytest.mark.skipif(platform.system() == 'Windows',
+ reason="Too much virtual memory for github windows env.")
+@test_utils.test(debug=True, gdb_trigger=False, packed=False, arch=[ti.cpu])
+def test_indices_assert():
+
+ overflow = ti.field(ti.i32, (334, 334, 334, 2 * 10))
+
+ @ti.kernel
+ def access_overflow():
+ overflow[0, 0, 0, 0] = 10
+ print(overflow[333, 333, 333, 0])
+
+ with pytest.raises(RuntimeError,
+ match='The indices provided are too big!'):
+ access_overflow()
diff --git a/tests/_python_orig/test_internal_func.py b/tests/_python_orig/test_internal_func.py
new file mode 100644
index 000000000..78dcd8eef
--- /dev/null
+++ b/tests/_python_orig/test_internal_func.py
@@ -0,0 +1,69 @@
+import time
+
+from taichi.lang import impl
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(exclude=[ti.metal, ti.opengl, ti.cuda, ti.vulkan, ti.cc])
+def test_basic():
+ @ti.kernel
+ def test():
+ for _ in range(10):
+ impl.call_internal("do_nothing")
+
+ test()
+
+
+@test_utils.test(exclude=[ti.metal, ti.opengl, ti.cuda, ti.vulkan, ti.cc])
+def test_host_polling():
+ return
+
+ @ti.kernel
+ def test():
+ impl.call_internal("refresh_counter")
+
+ for i in range(10):
+ print('updating tail to', i)
+ test()
+ time.sleep(0.1)
+
+
+@test_utils.test(exclude=[ti.metal, ti.opengl, ti.cuda, ti.vulkan, ti.cc])
+def test_list_manager():
+ @ti.kernel
+ def test():
+ impl.call_internal("test_list_manager")
+
+ test()
+ test()
+
+
+@test_utils.test(exclude=[ti.metal, ti.opengl, ti.cuda, ti.vulkan, ti.cc])
+def test_node_manager():
+ @ti.kernel
+ def test():
+ impl.call_internal("test_node_allocator")
+
+ test()
+ test()
+
+
+@test_utils.test(exclude=[ti.metal, ti.opengl, ti.cuda, ti.vulkan, ti.cc])
+def test_node_manager_gc():
+ @ti.kernel
+ def test_cpu():
+ impl.call_internal("test_node_allocator_gc_cpu")
+
+ test_cpu()
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda], debug=True)
+def test_return():
+ @ti.kernel
+ def test_cpu():
+ ret = impl.call_internal("test_internal_func_args", 1.0, 2.0, 3)
+ assert ret == 9
+
+ test_cpu()
diff --git a/tests/_python_orig/test_kernel_arg_errors.py b/tests/_python_orig/test_kernel_arg_errors.py
new file mode 100644
index 000000000..2e0797795
--- /dev/null
+++ b/tests/_python_orig/test_kernel_arg_errors.py
@@ -0,0 +1,17 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=ti.cpu)
+def test_pass_float_as_i32():
+ @ti.kernel
+ def foo(a: ti.i32):
+ pass
+
+ with pytest.raises(ti.TaichiRuntimeTypeError) as e:
+ foo(1.2)
+
+ assert e.value.args[
+ 0] == "Argument 0 (type=) cannot be converted into required type i32"
diff --git a/tests/_python_orig/test_kernel_templates.py b/tests/_python_orig/test_kernel_templates.py
new file mode 100644
index 000000000..09637436c
--- /dev/null
+++ b/tests/_python_orig/test_kernel_templates.py
@@ -0,0 +1,129 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_kernel_template_basic():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.f32)
+
+ n = 16
+
+ ti.root.dense(ti.i, n).place(x, y)
+
+ @ti.kernel
+ def inc(a: ti.template(), b: ti.template()):
+ for i in a:
+ a[i] += b
+
+ inc(x, 1)
+ inc(y, 2)
+
+ for i in range(n):
+ assert x[i] == 1
+ assert y[i] == 2
+
+ @ti.kernel
+ def inc2(z: ti.i32, a: ti.template(), b: ti.i32):
+ for i in a:
+ a[i] += b + z
+
+ inc2(10, x, 1)
+ for i in range(n):
+ assert x[i] == 12
+
+
+@test_utils.test()
+def test_kernel_template_gradient():
+ x = ti.field(ti.f32)
+ y = ti.field(ti.f32)
+ z = ti.field(ti.f32)
+ loss = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, 16).place(x, y, z)
+ ti.root.place(loss)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def double(a: ti.template(), b: ti.template()):
+ for i in range(16):
+ b[i] = a[i] * 2 + 1
+
+ @ti.kernel
+ def compute_loss():
+ for i in range(16):
+ ti.atomic_add(loss[None], z[i])
+
+ for i in range(16):
+ x[i] = i
+
+ with ti.Tape(loss):
+ double(x, y)
+ double(y, z)
+ compute_loss()
+
+ for i in range(16):
+ assert z[i] == i * 4 + 3
+ assert x.grad[i] == 4
+
+
+@test_utils.test()
+def test_func_template():
+ a = [ti.field(dtype=ti.f32) for _ in range(2)]
+ b = [ti.field(dtype=ti.f32) for _ in range(2)]
+
+ for l in range(2):
+ ti.root.dense(ti.ij, 16).place(a[l], b[l])
+
+ @ti.func
+ def sample(x: ti.template(), l: ti.template(), I):
+ return x[l][I]
+
+ @ti.kernel
+ def fill(l: ti.template()):
+ for I in ti.grouped(a[l]):
+ a[l][I] = l
+
+ @ti.kernel
+ def aTob(l: ti.template()):
+ for I in ti.grouped(b[l]):
+ b[l][I] = sample(a, l, I)
+
+ for l in range(2):
+ fill(l)
+ aTob(l)
+
+ for l in range(2):
+ for i in range(16):
+ for j in range(16):
+ assert b[l][i, j] == l
+
+
+@test_utils.test()
+def test_func_template2():
+ a = ti.field(dtype=ti.f32)
+ b = ti.field(dtype=ti.f32)
+
+ ti.root.dense(ti.ij, 16).place(a, b)
+
+ @ti.func
+ def sample(x: ti.template(), I):
+ return x[I]
+
+ @ti.kernel
+ def fill():
+ for I in ti.grouped(a):
+ a[I] = 1.0
+
+ @ti.kernel
+ def aTob():
+ for I in ti.grouped(b):
+ b[I] = sample(a, I)
+
+ for l in range(2):
+ fill()
+ aTob()
+
+ for i in range(16):
+ for j in range(16):
+ assert b[i, j] == 1.0
diff --git a/tests/_python_orig/test_lang.py b/tests/_python_orig/test_lang.py
new file mode 100644
index 000000000..1ce3dcde8
--- /dev/null
+++ b/tests/_python_orig/test_lang.py
@@ -0,0 +1,162 @@
+import numpy as np
+import pytest
+from taichi.lang.misc import get_host_arch_list
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_nested_subscript():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+
+ ti.root.dense(ti.i, 1).place(x)
+ ti.root.dense(ti.i, 1).place(y)
+
+ x[0] = 0
+
+ @ti.kernel
+ def inc():
+ for i in range(1):
+ x[x[i]] += 1
+
+ inc()
+
+ assert x[0] == 1
+
+
+@test_utils.test()
+def test_norm():
+ val = ti.field(ti.i32)
+ f = ti.field(ti.f32)
+
+ n = 1024
+
+ ti.root.dense(ti.i, n).dense(ti.i, 2).place(val, f)
+
+ @ti.kernel
+ def test():
+ for i in range(n):
+ s = 0
+ for j in range(10):
+ s += j
+ a = ti.Vector([0.4, 0.3])
+ val[i] = s + ti.cast(a.norm() * 100, ti.i32) + i
+
+ test()
+
+ @ti.kernel
+ def test2():
+ for i in range(n):
+ val[i] += 1
+
+ test2()
+
+ for i in range(n):
+ assert val[i] == 96 + i
+
+
+@test_utils.test()
+def test_simple2():
+ val = ti.field(ti.i32)
+ f = ti.field(ti.f32)
+
+ n = 16
+
+ ti.root.dense(ti.i, n).place(val, f)
+
+ @ti.kernel
+ def test():
+ for i in range(n):
+ val[i] = i * 2
+
+ test()
+
+ @ti.kernel
+ def test2():
+ for i in range(n):
+ val[i] += 1
+
+ test2()
+
+ for i in range(n):
+ assert val[i] == 1 + i * 2
+
+
+@test_utils.test()
+def test_recreate():
+ @ti.kernel
+ def test():
+ a = 0
+ a, b = 1, 2
+
+ test()
+
+
+@test_utils.test()
+def test_local_atomics():
+ n = 32
+ val = ti.field(ti.i32, shape=n)
+
+ @ti.kernel
+ def test():
+ for i in range(n):
+ s = 0
+ s += 45
+ print(s)
+ val[i] = s + i
+ print(val[i])
+
+ test()
+
+ for i in range(n):
+ assert val[i] == i + 45
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_loop_var_life():
+ @ti.kernel
+ def test():
+ for i in ti.static(range(8)):
+ pass
+ print(i)
+
+ with pytest.raises(Exception):
+ test()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_loop_var_life_double_iters():
+ @ti.kernel
+ def test():
+ for i, v in ti.static(enumerate(range(8))):
+ pass
+ print(i)
+
+ with pytest.raises(Exception):
+ test()
+
+
+@pytest.mark.parametrize('dtype', [ti.i32, ti.f32, ti.i64, ti.f64])
+@pytest.mark.parametrize('ti_zero,zero', [(ti.zero, 0), (ti.one, 1)])
+@pytest.mark.parametrize('is_mat', [False, True])
+@test_utils.test(arch=ti.cpu)
+def test_meta_zero_one(dtype, ti_zero, zero, is_mat):
+ if is_mat:
+ x = ti.Matrix.field(2, 3, dtype, ())
+ y = ti.Matrix.field(2, 3, dtype, ())
+ else:
+ x = ti.field(dtype, ())
+ y = ti.field(dtype, ())
+
+ @ti.kernel
+ def func():
+ y[None] = ti_zero(x[None])
+
+ for a in [-1, -2.3, -1, -0.3, 0, 1, 1.9, 2, 3]:
+ if ti.types.is_integral(dtype):
+ a = int(a)
+ x.fill(a)
+ func()
+ assert np.all(y.to_numpy() == zero)
diff --git a/tests/_python_orig/test_lexical_scope.py b/tests/_python_orig/test_lexical_scope.py
new file mode 100644
index 000000000..4c5909c93
--- /dev/null
+++ b/tests/_python_orig/test_lexical_scope.py
@@ -0,0 +1,28 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(ti.cpu)
+def test_func_closure():
+ def my_test():
+ a = 32
+
+ @ti.func
+ def foo():
+ ti.static_assert(a == 32)
+
+ @ti.kernel
+ def func():
+ ti.static_assert(a == 32)
+ foo()
+
+ def dummy():
+ func()
+
+ func()
+ dummy()
+ return dummy, func
+
+ dummy, func = my_test()
+ func()
+ dummy()
diff --git a/tests/_python_orig/test_linalg.py b/tests/_python_orig/test_linalg.py
new file mode 100644
index 000000000..3a882ce69
--- /dev/null
+++ b/tests/_python_orig/test_linalg.py
@@ -0,0 +1,460 @@
+import math
+
+import numpy as np
+import pytest
+from taichi.lang.misc import get_host_arch_list
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_const_init():
+ a = ti.Matrix.field(2, 3, dtype=ti.i32, shape=())
+ b = ti.Vector.field(3, dtype=ti.i32, shape=())
+
+ @ti.kernel
+ def init():
+ a[None] = ti.Matrix([[0, 1, 2], [3, 4, 5]])
+ b[None] = ti.Vector([0, 1, 2])
+
+ init()
+
+ for i in range(2):
+ for j in range(3):
+ assert a[None][i, j] == i * 3 + j
+
+ for j in range(3):
+ assert b[None][j] == j
+
+
+@test_utils.test()
+def test_basic_utils():
+ a = ti.Vector.field(3, dtype=ti.f32)
+ b = ti.Vector.field(2, dtype=ti.f32)
+ abT = ti.Matrix.field(3, 2, dtype=ti.f32)
+ aNormalized = ti.Vector.field(3, dtype=ti.f32)
+
+ normA = ti.field(ti.f32)
+ normSqrA = ti.field(ti.f32)
+ normInvA = ti.field(ti.f32)
+
+ ti.root.place(a, b, abT, aNormalized, normA, normSqrA, normInvA)
+
+ @ti.kernel
+ def init():
+ a[None] = ti.Vector([1.0, 2.0, -3.0])
+ b[None] = ti.Vector([4.0, 5.0])
+ abT[None] = a[None].outer_product(b[None])
+
+ normA[None] = a[None].norm()
+ normSqrA[None] = a[None].norm_sqr()
+ normInvA[None] = a[None].norm_inv()
+
+ aNormalized[None] = a[None].normalized()
+
+ init()
+
+ for i in range(3):
+ for j in range(2):
+ assert abT[None][i, j] == a[None][i] * b[None][j]
+
+ sqrt14 = np.sqrt(14.0)
+ invSqrt14 = 1.0 / sqrt14
+ assert normSqrA[None] == test_utils.approx(14.0)
+ assert normInvA[None] == test_utils.approx(invSqrt14)
+ assert normA[None] == test_utils.approx(sqrt14)
+ assert aNormalized[None][0] == test_utils.approx(1.0 * invSqrt14)
+ assert aNormalized[None][1] == test_utils.approx(2.0 * invSqrt14)
+ assert aNormalized[None][2] == test_utils.approx(-3.0 * invSqrt14)
+
+
+@test_utils.test()
+def test_cross():
+ a = ti.Vector.field(3, dtype=ti.f32)
+ b = ti.Vector.field(3, dtype=ti.f32)
+ c = ti.Vector.field(3, dtype=ti.f32)
+
+ a2 = ti.Vector.field(2, dtype=ti.f32)
+ b2 = ti.Vector.field(2, dtype=ti.f32)
+ c2 = ti.field(dtype=ti.f32)
+
+ ti.root.place(a, b, c, a2, b2, c2)
+
+ @ti.kernel
+ def init():
+ a[None] = ti.Vector([1.0, 2.0, 3.0])
+ b[None] = ti.Vector([4.0, 5.0, 6.0])
+ c[None] = a[None].cross(b[None])
+
+ a2[None] = ti.Vector([1.0, 2.0])
+ b2[None] = ti.Vector([4.0, 5.0])
+ c2[None] = a2[None].cross(b2[None])
+
+ init()
+ assert c[None][0] == -3.0
+ assert c[None][1] == 6.0
+ assert c[None][2] == -3.0
+ assert c2[None] == -3.0
+
+
+@test_utils.test()
+def test_dot():
+ a = ti.Vector.field(3, dtype=ti.f32)
+ b = ti.Vector.field(3, dtype=ti.f32)
+ c = ti.field(dtype=ti.f32)
+
+ a2 = ti.Vector.field(2, dtype=ti.f32)
+ b2 = ti.Vector.field(2, dtype=ti.f32)
+ c2 = ti.field(dtype=ti.f32)
+
+ ti.root.place(a, b, c, a2, b2, c2)
+
+ @ti.kernel
+ def init():
+ a[None] = ti.Vector([1.0, 2.0, 3.0])
+ b[None] = ti.Vector([4.0, 5.0, 6.0])
+ c[None] = a[None].dot(b[None])
+
+ a2[None] = ti.Vector([1.0, 2.0])
+ b2[None] = ti.Vector([4.0, 5.0])
+ c2[None] = a2[None].dot(b2[None])
+
+ init()
+ assert c[None] == 32.0
+ assert c2[None] == 14.0
+
+
+@test_utils.test()
+def test_transpose():
+ dim = 3
+ m = ti.Matrix.field(dim, dim, ti.f32)
+
+ ti.root.place(m)
+
+ @ti.kernel
+ def transpose():
+ mat = m[None].transpose()
+ m[None] = mat
+
+ for i in range(dim):
+ for j in range(dim):
+ m[None][i, j] = i * 2 + j * 7
+
+ transpose()
+
+ for i in range(dim):
+ for j in range(dim):
+ assert m[None][j, i] == test_utils.approx(i * 2 + j * 7)
+
+
+def _test_polar_decomp(dim, dt):
+ m = ti.Matrix.field(dim, dim, dt)
+ r = ti.Matrix.field(dim, dim, dt)
+ s = ti.Matrix.field(dim, dim, dt)
+ I = ti.Matrix.field(dim, dim, dt)
+ D = ti.Matrix.field(dim, dim, dt)
+
+ ti.root.place(m, r, s, I, D)
+
+ @ti.kernel
+ def polar():
+ R, S = ti.polar_decompose(m[None], dt)
+ r[None] = R
+ s[None] = S
+ m[None] = R @ S
+ I[None] = R @ R.transpose()
+ D[None] = S - S.transpose()
+
+ def V(i, j):
+ return i * 2 + j * 7 + int(i == j) * 3
+
+ for i in range(dim):
+ for j in range(dim):
+ m[None][i, j] = V(i, j)
+
+ polar()
+
+ tol = 5e-5 if dt == ti.f32 else 1e-12
+
+ for i in range(dim):
+ for j in range(dim):
+ assert m[None][i, j] == test_utils.approx(V(i, j), abs=tol)
+ assert I[None][i, j] == test_utils.approx(int(i == j), abs=tol)
+ assert D[None][i, j] == test_utils.approx(0, abs=tol)
+
+
+def test_polar_decomp():
+ for dim in [2, 3]:
+ for dt in [ti.f32, ti.f64]:
+
+ @test_utils.test(
+ require=ti.extension.data64 if dt == ti.f64 else [],
+ default_fp=dt)
+ def wrapped():
+ _test_polar_decomp(dim, dt)
+
+ wrapped()
+
+
+@test_utils.test()
+def test_matrix():
+ x = ti.Matrix.field(2, 2, dtype=ti.i32)
+
+ ti.root.dense(ti.i, 16).place(x)
+
+ @ti.kernel
+ def inc():
+ for i in x:
+ delta = ti.Matrix([[3, 0], [0, 0]])
+ x[i][1, 1] = x[i][0, 0] + 1
+ x[i] = x[i] + delta
+ x[i] += delta
+
+ for i in range(10):
+ x[i][0, 0] = i
+
+ inc()
+
+ for i in range(10):
+ assert x[i][0, 0] == 6 + i
+ assert x[i][1, 1] == 1 + i
+
+
+@test_utils.test()
+def _test_mat_inverse_size(n):
+ m = ti.Matrix.field(n, n, dtype=ti.f32, shape=())
+ M = np.empty(shape=(n, n), dtype=np.float32)
+ for i in range(n):
+ for j in range(n):
+ M[i, j] = i * j + i * 3 + j + 1 + int(i == j) * 4
+ assert np.linalg.det(M) != 0
+
+ m.from_numpy(M)
+
+ @ti.kernel
+ def invert():
+ m[None] = m[None].inverse()
+
+ invert()
+
+ m_np = m.to_numpy(keep_dims=True)
+ np.testing.assert_almost_equal(m_np, np.linalg.inv(M))
+
+
+def test_mat_inverse():
+ for n in range(1, 5):
+ _test_mat_inverse_size(n)
+
+
+@test_utils.test()
+def test_matrix_factories():
+ a = ti.Vector.field(3, dtype=ti.i32, shape=3)
+ b = ti.Matrix.field(2, 2, dtype=ti.f32, shape=2)
+ c = ti.Matrix.field(2, 3, dtype=ti.f32, shape=2)
+
+ @ti.kernel
+ def fill():
+ b[0] = ti.Matrix.identity(ti.f32, 2)
+ b[1] = ti.Matrix.rotation2d(math.pi / 3)
+ c[0] = ti.Matrix.zero(ti.f32, 2, 3)
+ c[1] = ti.Matrix.one(ti.f32, 2, 3)
+ for i in ti.static(range(3)):
+ a[i] = ti.Vector.unit(3, i)
+
+ fill()
+
+ for i in range(3):
+ for j in range(3):
+ assert a[i][j] == int(i == j)
+
+ sqrt3o2 = math.sqrt(3) / 2
+ assert b[0].to_numpy() == test_utils.approx(np.eye(2))
+ assert b[1].to_numpy() == test_utils.approx(
+ np.array([[0.5, -sqrt3o2], [sqrt3o2, 0.5]]))
+ assert c[0].to_numpy() == test_utils.approx(np.zeros((2, 3)))
+ assert c[1].to_numpy() == test_utils.approx(np.ones((2, 3)))
+
+
+# TODO: move codes below to test_matrix.py:
+
+
+@test_utils.test()
+def test_init_matrix_from_vectors():
+ m1 = ti.Matrix.field(3, 3, dtype=ti.f32, shape=(3))
+ m2 = ti.Matrix.field(3, 3, dtype=ti.f32, shape=(3))
+ m3 = ti.Matrix.field(3, 3, dtype=ti.f32, shape=(3))
+ m4 = ti.Matrix.field(3, 3, dtype=ti.f32, shape=(3))
+
+ @ti.kernel
+ def fill():
+ for i in range(3):
+ a = ti.Vector([1.0, 4.0, 7.0])
+ b = ti.Vector([2.0, 5.0, 8.0])
+ c = ti.Vector([3.0, 6.0, 9.0])
+ m1[i] = ti.Matrix.rows([a, b, c])
+ m2[i] = ti.Matrix.cols([a, b, c])
+ m3[i] = ti.Matrix.rows([[1.0, 4.0, 7.0], [2.0, 5.0, 8.0],
+ [3.0, 6.0, 9.0]])
+ m4[i] = ti.Matrix.cols([[1.0, 4.0, 7.0], [2.0, 5.0, 8.0],
+ [3.0, 6.0, 9.0]])
+
+ fill()
+
+ for j in range(3):
+ for i in range(3):
+ assert m1[0][i, j] == int(i + 3 * j + 1)
+ assert m2[0][j, i] == int(i + 3 * j + 1)
+ assert m3[0][i, j] == int(i + 3 * j + 1)
+ assert m4[0][j, i] == int(i + 3 * j + 1)
+
+
+# TODO: Remove this once the APIs are obsolete.
+@pytest.mark.filterwarnings('ignore')
+@test_utils.test(arch=get_host_arch_list())
+def test_init_matrix_from_vectors_deprecated():
+ m1 = ti.Matrix.field(3, 3, dtype=ti.f32, shape=(3))
+ m2 = ti.Matrix.field(3, 3, dtype=ti.f32, shape=(3))
+ m3 = ti.Matrix.field(3, 3, dtype=ti.f32, shape=(3))
+ m4 = ti.Matrix.field(3, 3, dtype=ti.f32, shape=(3))
+
+ @ti.kernel
+ def fill():
+ for i in range(3):
+ a = ti.Vector([1.0, 4.0, 7.0])
+ b = ti.Vector([2.0, 5.0, 8.0])
+ c = ti.Vector([3.0, 6.0, 9.0])
+ m1[i] = ti.Matrix.rows([a, b, c])
+ m2[i] = ti.Matrix.cols([a, b, c])
+ m3[i] = ti.Matrix.rows([[1.0, 4.0, 7.0], [2.0, 5.0, 8.0],
+ [3.0, 6.0, 9.0]])
+ m4[i] = ti.Matrix.cols([[1.0, 4.0, 7.0], [2.0, 5.0, 8.0],
+ [3.0, 6.0, 9.0]])
+
+ fill()
+
+ for j in range(3):
+ for i in range(3):
+ assert m1[0][i, j] == int(i + 3 * j + 1)
+ assert m2[0][j, i] == int(i + 3 * j + 1)
+ assert m3[0][i, j] == int(i + 3 * j + 1)
+ assert m4[0][j, i] == int(i + 3 * j + 1)
+
+
+@test_utils.test()
+def test_any_all():
+ a = ti.Matrix.field(2, 2, dtype=ti.i32, shape=())
+ b = ti.field(dtype=ti.i32, shape=())
+ c = ti.field(dtype=ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ b[None] = any(a[None])
+ c[None] = all(a[None])
+
+ for i in range(2):
+ for j in range(2):
+ a[None][0, 0] = i
+ a[None][1, 0] = j
+ a[None][1, 1] = i
+ a[None][0, 1] = j
+
+ func()
+ if i == 1 or j == 1:
+ assert b[None] == 1
+ else:
+ assert b[None] == 0
+
+ if i == 1 and j == 1:
+ assert c[None] == 1
+ else:
+ assert c[None] == 0
+
+
+@test_utils.test()
+def test_min_max():
+ a = ti.Matrix.field(2, 2, dtype=ti.i32, shape=())
+ b = ti.field(dtype=ti.i32, shape=())
+ c = ti.field(dtype=ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ b[None] = a[None].max()
+ c[None] = a[None].min()
+
+ for i in range(2):
+ for j in range(2):
+ a[None][0, 0] = i
+ a[None][1, 0] = j
+ a[None][1, 1] = i
+ a[None][0, 1] = j
+
+ func()
+ assert b[None] == max(i, j)
+ assert c[None] == min(i, j)
+
+
+# must not throw any error:
+@test_utils.test()
+def test_matrix_list_assign():
+
+ m = ti.Matrix.field(2, 2, dtype=ti.i32, shape=(2, 2, 1))
+ v = ti.Vector.field(2, dtype=ti.i32, shape=(2, 2, 1))
+
+ m[1, 0, 0] = [[4, 3], [6, 7]]
+ v[1, 0, 0] = [8, 4]
+
+ assert np.allclose(m.to_numpy()[1, 0, 0, :, :], np.array([[4, 3], [6, 7]]))
+ assert np.allclose(v.to_numpy()[1, 0, 0, :], np.array([8, 4]))
+
+ @ti.kernel
+ def func():
+ m[1, 0, 0] = [[1, 2], [3, 4]]
+ v[1, 0, 0] = [5, 6]
+ m[1, 0, 0] += [[1, 2], [3, 4]]
+ v[1, 0, 0] += [5, 6]
+
+ func()
+ assert np.allclose(m.to_numpy()[1, 0, 0, :, :], np.array([[2, 4], [6, 8]]))
+ assert np.allclose(v.to_numpy()[1, 0, 0, :], np.array([10, 12]))
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_vector_xyzw_accessor():
+ u = ti.Vector.field(2, dtype=ti.i32, shape=(2, 2, 1))
+ v = ti.Vector.field(4, dtype=ti.i32, shape=(2, 2, 1))
+
+ u[1, 0, 0].y = 3
+ v[1, 0, 0].z = 0
+ v[1, 0, 0].w = 4
+
+ @ti.kernel
+ def func():
+ u[1, 0, 0].x = 8 * u[1, 0, 0].y
+ v[1, 0, 0].z = 1 - v[1, 0, 0].w
+ v[1, 0, 0].x = 6
+
+ func()
+ assert u[1, 0, 0].x == 24
+ assert u[1, 0, 0].y == 3
+ assert v[1, 0, 0].z == -3
+ assert v[1, 0, 0].w == 4
+ assert np.allclose(v.to_numpy()[1, 0, 0, :], np.array([6, 0, -3, 4]))
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_diag():
+ m1 = ti.Matrix.field(3, 3, dtype=ti.f32, shape=())
+
+ @ti.kernel
+ def fill():
+ m1[None] = ti.Matrix.diag(dim=3, val=1.4)
+
+ fill()
+
+ for i in range(3):
+ for j in range(3):
+ if i == j:
+ assert m1[None][i, j] == test_utils.approx(1.4)
+ else:
+ assert m1[None][i, j] == 0.0
diff --git a/tests/_python_orig/test_listgen.py b/tests/_python_orig/test_listgen.py
new file mode 100644
index 000000000..eb6194624
--- /dev/null
+++ b/tests/_python_orig/test_listgen.py
@@ -0,0 +1,62 @@
+from random import randrange
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_listgen():
+ x = ti.field(ti.i32)
+ n = 1024
+
+ ti.root.dense(ti.ij, 4).dense(ti.ij, 4).dense(ti.ij,
+ 4).dense(ti.ij,
+ 4).dense(ti.ij,
+ 4).place(x)
+
+ @ti.kernel
+ def fill(c: ti.i32):
+ for i, j in x:
+ x[i, j] = i * 10 + j + c
+
+ for c in range(2):
+ print('Testing c=%d' % c)
+ fill(c)
+ # read it out once to avoid launching too many operator[] kernels
+ xnp = x.to_numpy()
+ for i in range(n):
+ for j in range(n):
+ assert xnp[i, j] == i * 10 + j + c
+
+ # Randomly check 1000 items to ensure [] work as well
+ for _ in range(1000):
+ i, j = randrange(n), randrange(n)
+ assert x[i, j] == i * 10 + j + c
+
+
+@test_utils.test()
+def test_nested_3d():
+ x = ti.field(ti.i32)
+ n = 128
+
+ ti.root.dense(ti.ijk, 4).dense(ti.ijk, 4).dense(ti.ijk,
+ 4).dense(ti.ijk,
+ 2).place(x)
+
+ @ti.kernel
+ def fill():
+ for i, j, k in x:
+ x[i, j, k] = (i * n + j) * n + k
+
+ fill()
+ # read it out once to avoid launching too many operator[] kernels
+ xnp = x.to_numpy()
+ for i in range(n):
+ for j in range(n):
+ for k in range(n):
+ assert xnp[i, j, k] == (i * n + j) * n + k
+
+ # Randomly check 1000 items to ensure [] work as well
+ for _ in range(1000):
+ i, j, k = randrange(n), randrange(n), randrange(n)
+ assert x[i, j, k] == (i * n + j) * n + k
diff --git a/tests/_python_orig/test_local_atomic_opt.py b/tests/_python_orig/test_local_atomic_opt.py
new file mode 100644
index 000000000..560fb9071
--- /dev/null
+++ b/tests/_python_orig/test_local_atomic_opt.py
@@ -0,0 +1,32 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_cse():
+ A = ti.field(ti.f32, shape=())
+
+ @ti.kernel
+ def func():
+ a = 0
+ a += 10
+ a = a + 123
+ A[None] = a
+
+ func()
+ assert A[None] == 133
+
+
+@test_utils.test()
+def test_store_forward():
+ A = ti.field(ti.f32, shape=())
+
+ @ti.kernel
+ def func():
+ a = 0
+ a = 123
+ a += 10
+ A[None] = a
+
+ func()
+ assert A[None] == 133
diff --git a/tests/_python_orig/test_local_atomics.py b/tests/_python_orig/test_local_atomics.py
new file mode 100644
index 000000000..272834d97
--- /dev/null
+++ b/tests/_python_orig/test_local_atomics.py
@@ -0,0 +1,184 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_explicit_local_atomic_add():
+ A = ti.field(ti.f32, shape=())
+
+ @ti.kernel
+ def func():
+ a = 0
+ for i in range(10):
+ ti.atomic_add(a, i)
+ A[None] = a
+
+ func()
+ assert A[None] == 45
+
+
+@test_utils.test()
+def test_implicit_local_atomic_add():
+ A = ti.field(ti.f32, shape=())
+
+ @ti.kernel
+ def func():
+ a = 0
+ for i in range(10):
+ a += i
+ A[None] = a
+
+ func()
+ assert A[None] == 45
+
+
+@test_utils.test()
+def test_explicit_local_atomic_sub():
+ A = ti.field(ti.f32, shape=())
+
+ @ti.kernel
+ def func():
+ a = 0
+ for i in range(10):
+ ti.atomic_sub(a, i)
+ A[None] = a
+
+ func()
+ assert A[None] == -45
+
+
+@test_utils.test()
+def test_implicit_local_atomic_sub():
+ A = ti.field(ti.f32, shape=())
+
+ @ti.kernel
+ def func():
+ a = 0
+ for i in range(10):
+ a -= i
+ A[None] = a
+
+ func()
+ assert A[None] == -45
+
+
+@test_utils.test()
+def test_explicit_local_atomic_min():
+ A = ti.field(ti.f32, shape=())
+
+ @ti.kernel
+ def func():
+ a = 1000
+ for i in range(10):
+ ti.atomic_min(a, i)
+ A[None] = a
+
+ func()
+ assert A[None] == 0
+
+
+@test_utils.test()
+def test_explicit_local_atomic_max():
+ A = ti.field(ti.f32, shape=())
+
+ @ti.kernel
+ def func():
+ a = -1000
+ for i in range(10):
+ ti.atomic_max(a, i)
+ A[None] = a
+
+ func()
+ assert A[None] == 9
+
+
+@test_utils.test()
+def test_explicit_local_atomic_and():
+ A = ti.field(ti.i32, shape=())
+ max_int = 2147483647
+
+ @ti.kernel
+ def func():
+ a = 1023
+ for i in range(10):
+ ti.atomic_and(a, max_int - 2**i)
+ A[None] = a
+
+ func()
+ assert A[None] == 0
+
+
+@test_utils.test()
+def test_implicit_local_atomic_and():
+ A = ti.field(ti.i32, shape=())
+ max_int = 2147483647
+
+ @ti.kernel
+ def func():
+ a = 1023
+ for i in range(10):
+ a &= max_int - 2**i
+ A[None] = a
+
+ func()
+ assert A[None] == 0
+
+
+@test_utils.test()
+def test_explicit_local_atomic_or():
+ A = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ a = 0
+ for i in range(10):
+ ti.atomic_or(a, 2**i)
+ A[None] = a
+
+ func()
+ assert A[None] == 1023
+
+
+@test_utils.test()
+def test_implicit_local_atomic_or():
+ A = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ a = 0
+ for i in range(10):
+ a |= 2**i
+ A[None] = a
+
+ func()
+ assert A[None] == 1023
+
+
+@test_utils.test()
+def test_explicit_local_atomic_xor():
+ A = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ a = 1023
+ for i in range(10):
+ ti.atomic_xor(a, 2**i)
+ A[None] = a
+
+ func()
+ assert A[None] == 0
+
+
+@test_utils.test()
+def test_implicit_local_atomic_xor():
+ A = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ a = 1023
+ for i in range(10):
+ a ^= 2**i
+ A[None] = a
+
+ func()
+ assert A[None] == 0
diff --git a/tests/_python_orig/test_loop_grad.py b/tests/_python_orig/test_loop_grad.py
new file mode 100644
index 000000000..ab2096134
--- /dev/null
+++ b/tests/_python_orig/test_loop_grad.py
@@ -0,0 +1,65 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(exclude=[ti.vulkan])
+def test_loop_grad():
+ x = ti.field(ti.f32)
+
+ n = 16
+ m = 8
+
+ ti.root.dense(ti.ij, (n, m)).place(x)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ for k in range(n):
+ for i in range(m - 1):
+ x[k, i + 1] = x[k, i] * 2
+
+ for k in range(n):
+ x[k, 0] = k
+ func()
+
+ for k in range(n):
+ x.grad[k, m - 1] = 1
+ func.grad()
+
+ for k in range(n):
+ for i in range(m):
+ assert x[k, i] == 2**i * k
+ assert x.grad[k, i] == 2**(m - 1 - i)
+
+
+@test_utils.test(exclude=[ti.vulkan])
+def test_loop_grad_complex():
+ return # This case is not supported yet
+ x = ti.field(ti.f32)
+
+ n = 16
+ m = 8
+
+ ti.root.dense(ti.ij, (n, m)).place(x)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ for k in range(n):
+ t = k * k
+ tt = t * 2
+ for i in range(m - 1):
+ x[k, i + 1] = x[k, i] * 2 + tt
+
+ for k in range(n):
+ x[k, 0] = k
+ func()
+
+ for k in range(n):
+ x.grad[k, m - 1] = 1
+ func.grad()
+
+ for k in range(n):
+ for i in range(m):
+ assert x[k, i] == i**2 + 2 * k**2
+ assert x.grad[k, i] == 2**(m - 1 - i)
diff --git a/tests/_python_orig/test_loop_unique.py b/tests/_python_orig/test_loop_unique.py
new file mode 100644
index 000000000..8c439b3bd
--- /dev/null
+++ b/tests/_python_orig/test_loop_unique.py
@@ -0,0 +1,161 @@
+from taichi.lang.misc import loop_unique
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_loop_unique_simple_1d():
+ x, y = ti.field(ti.i32), ti.field(ti.i32)
+
+ N = 16
+ ti.root.pointer(ti.i, N).place(x)
+ ti.root.pointer(ti.i, N).place(y)
+
+ @ti.kernel
+ def inc_y():
+ for i in x:
+ a = loop_unique(x[i])
+ y[a] += 1
+
+ x[1] = 2
+ x[2] = 3
+ x[7] = 5
+ y[3] = 2
+ y[4] = 3
+ inc_y()
+ expected_result = {2: 1, 3: 3, 4: 3, 5: 1}
+ for i in range(N):
+ assert y[i] == expected_result.get(i, 0)
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_loop_unique_binary_op_1d():
+ x, y = ti.field(ti.i32), ti.field(ti.i32)
+
+ N = 16
+ ti.root.pointer(ti.i, N).place(x)
+ ti.root.pointer(ti.i, N).place(y)
+
+ @ti.kernel
+ def inc_y():
+ for i in x:
+ a = loop_unique(x[i])
+ y[a + 1] += 1
+
+ x[1] = 2
+ x[2] = 3
+ x[7] = 5
+ y[3] = 2
+ y[4] = 3
+ inc_y()
+ expected_result = {3: 3, 4: 4, 6: 1}
+ for i in range(N):
+ assert y[i] == expected_result.get(i, 0)
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_loop_unique_nested_1d():
+ x, y = ti.field(ti.i32), ti.field(ti.i32)
+
+ N = 16
+ ti.root.pointer(ti.i, N).place(x)
+ ti.root.pointer(ti.i, N).place(y)
+
+ @ti.kernel
+ def inc_y():
+ for i in x:
+ for j in range(i):
+ a = loop_unique(x[i])
+ y[a] += 1
+
+ x[1] = 2
+ x[2] = 3
+ x[7] = 5
+ y[3] = 2
+ y[4] = 3
+ inc_y()
+ expected_result = {2: 1, 3: 4, 4: 3, 5: 7}
+ for i in range(N):
+ assert y[i] == expected_result.get(i, 0)
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_loop_unique_2d():
+ x, y, z = ti.field(ti.i32), ti.field(ti.i32), ti.field(ti.i32)
+
+ N = 8
+ ti.root.pointer(ti.ij, N).place(x)
+ ti.root.pointer(ti.ij, N).place(y)
+ ti.root.pointer(ti.ij, N).place(z)
+
+ @ti.kernel
+ def inc_y_z():
+ for i, j in x:
+ a = loop_unique(x[i, j])
+ y[a, j] += 1
+ z[i, i] += 1 # cannot demote this
+
+ x[1, 1] = 2
+ x[1, 2] = 4
+ x[1, 3] = 5
+ x[1, 4] = 7
+ x[1, 5] = 0
+ x[1, 6] = 1
+ x[2, 5] = 3
+ x[2, 7] = 6
+ y[3, 5] = 3
+ y[6, 6] = 8
+ z[2, 2] = 5
+ inc_y_z()
+ expected_result_y = {
+ (0, 5): 1,
+ (1, 6): 1,
+ (2, 1): 1,
+ (3, 5): 4,
+ (4, 2): 1,
+ (5, 3): 1,
+ (6, 6): 8,
+ (6, 7): 1,
+ (7, 4): 1
+ }
+ expected_result_z = {(1, 1): 6, (2, 2): 7}
+ for i in range(N):
+ for j in range(N):
+ assert y[i, j] == expected_result_y.get((i, j), 0)
+ assert z[i, j] == expected_result_z.get((i, j), 0)
+
+
+@test_utils.test()
+def test_loop_unique_ndrange():
+ x, y, z = ti.field(ti.i32), ti.field(ti.i32), ti.field(ti.i32)
+
+ N = 8
+ M = 32
+ ti.root.dense(ti.ij, N).place(x)
+ ti.root.dense(ti.i, M).place(y)
+ ti.root.dense(ti.ij, N).place(z)
+
+ a = 3
+ b = 5
+
+ @ti.kernel
+ def prepare_x():
+ for i, j in ti.ndrange(a, b):
+ x[i, j] = i * (b + 1) + j + 1
+
+ @ti.kernel
+ def inc_y_z():
+ for i, j in ti.ndrange(a, b):
+ u = loop_unique(x[i, j])
+ y[u] += i
+ z[i, j + 1] += 10 # TODO: demote this
+
+ prepare_x()
+ inc_y_z()
+ for i in range(a * (b + 1)):
+ assert y[i] == (0 if i % (b + 1) == 0 else i // (b + 1))
+
+ for i in range(a):
+ for j in range(b + 1):
+ assert z[i, j] == (0 if j == 0 else 10)
diff --git a/tests/_python_orig/test_loops.py b/tests/_python_orig/test_loops.py
new file mode 100644
index 000000000..588bf302d
--- /dev/null
+++ b/tests/_python_orig/test_loops.py
@@ -0,0 +1,174 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_loops():
+ x = ti.field(ti.f32)
+ y = ti.field(ti.f32)
+
+ N = 512
+
+ ti.root.dense(ti.i, N).place(x)
+ ti.root.dense(ti.i, N).place(y)
+ ti.root.lazy_grad()
+
+ for i in range(N // 2, N):
+ y[i] = i - 300
+
+ @ti.kernel
+ def func():
+ for i in range(ti.static(N // 2 + 3), N):
+ x[i] = abs(y[i])
+
+ func()
+
+ for i in range(N // 2 + 3):
+ assert x[i] == 0
+
+ for i in range(N // 2 + 3, N):
+ assert x[i] == abs(y[i])
+
+
+@test_utils.test()
+def test_numpy_loops():
+ x = ti.field(ti.f32)
+ y = ti.field(ti.f32)
+
+ N = 512
+
+ ti.root.dense(ti.i, N).place(x)
+ ti.root.dense(ti.i, N).place(y)
+ ti.root.lazy_grad()
+
+ for i in range(N // 2, N):
+ y[i] = i - 300
+
+ import numpy as np
+ begin = (np.ones(1) * (N // 2 + 3)).astype(np.int32).reshape(())
+ end = (np.ones(1) * N).astype(np.int32).reshape(())
+
+ @ti.kernel
+ def func():
+ for i in range(begin, end):
+ x[i] = abs(y[i])
+
+ func()
+
+ for i in range(N // 2 + 3):
+ assert x[i] == 0
+
+ for i in range(N // 2 + 3, N):
+ assert x[i] == abs(y[i])
+
+
+@test_utils.test()
+def test_nested_loops():
+ # this may crash if any LLVM allocas are called in the loop body
+ x = ti.field(ti.i32)
+
+ n = 2048
+
+ ti.root.dense(ti.ij, n).place(x)
+
+ @ti.kernel
+ def paint():
+ for i in range(n):
+ for j in range(n):
+ x[0, 0] = i
+
+ paint()
+
+
+@test_utils.test()
+def test_zero_outer_loop():
+ x = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def test():
+ for i in range(0):
+ x[None] = 1
+
+ test()
+
+ assert x[None] == 0
+
+
+@test_utils.test()
+def test_zero_inner_loop():
+ x = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def test():
+ for i in range(1):
+ for j in range(0):
+ x[None] = 1
+
+ test()
+
+ assert x[None] == 0
+
+
+@test_utils.test()
+def test_dynamic_loop_range():
+ x = ti.field(ti.i32)
+ c = ti.field(ti.i32)
+ n = 2000
+
+ ti.root.dense(ti.i, n).place(x)
+ ti.root.place(c)
+
+ @ti.kernel
+ def test():
+ for i in x:
+ x[i] = ti.atomic_add(c[None], 1)
+ for i in range(c[None], c[None] * 2):
+ x[i - n] += c[None]
+
+ test()
+ assert c[None] == n
+ assert sum(x.to_numpy()) == (n * (n - 1) // 2) + n * n
+
+
+@test_utils.test()
+def test_loop_arg_as_range():
+ # Dynamic range loops are intended to make sure global tmps work
+ x = ti.field(ti.i32)
+ n = 1000
+
+ ti.root.dense(ti.i, n).place(x)
+
+ @ti.kernel
+ def test(b: ti.i32, e: ti.i32):
+ for i in range(b, e):
+ x[i - b] = i
+
+ pairs = [
+ (0, n // 2),
+ (n // 2, n),
+ (-n // 2, -n // 3),
+ ]
+ for b, e in pairs:
+ test(b, e)
+ for i in range(b, e):
+ assert x[i - b] == i
+
+
+@test_utils.test()
+def test_assignment_in_nested_loops():
+ # https://github.com/taichi-dev/taichi/issues/1109
+ m = ti.field(ti.f32, 3)
+ x = ti.field(ti.f32, ())
+
+ @ti.kernel
+ def func():
+ a = x[None]
+ for i in m:
+ b = a
+ for j in range(1):
+ b = b
+ x[None] = b
+
+ x[None] = 1
+ func()
+ assert x[None] == 1
diff --git a/tests/_python_orig/test_materialize_check.py b/tests/_python_orig/test_materialize_check.py
new file mode 100644
index 000000000..e7acc6689
--- /dev/null
+++ b/tests/_python_orig/test_materialize_check.py
@@ -0,0 +1,35 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_check_field_not_placed():
+ a = ti.field(ti.i32)
+
+ @ti.kernel
+ def foo():
+ pass
+
+ with pytest.raises(RuntimeError,
+ match=r"These field\(s\) are not placed.*"):
+ foo()
+
+
+@test_utils.test()
+def test_check_matrix_field_member_shape():
+ a = ti.Matrix.field(2, 2, ti.i32)
+ ti.root.dense(ti.i, 10).place(a.get_scalar_field(0, 0))
+ ti.root.dense(ti.i, 11).place(a.get_scalar_field(0, 1))
+ ti.root.dense(ti.i, 10).place(a.get_scalar_field(1, 0))
+ ti.root.dense(ti.i, 11).place(a.get_scalar_field(1, 1))
+
+ @ti.kernel
+ def foo():
+ pass
+
+ with pytest.raises(
+ RuntimeError,
+ match=r"Members of the following field have different shapes.*"):
+ foo()
diff --git a/tests/_python_orig/test_matrix.py b/tests/_python_orig/test_matrix.py
new file mode 100644
index 000000000..22d56d630
--- /dev/null
+++ b/tests/_python_orig/test_matrix.py
@@ -0,0 +1,526 @@
+import math
+import operator
+
+import numpy as np
+import pytest
+from taichi.lang import impl
+from taichi.lang.misc import get_host_arch_list
+
+import taichi as ti
+from tests import test_utils
+
+operation_types = [operator.add, operator.sub, operator.matmul]
+test_matrix_arrays = [
+ np.array([[1, 2], [3, 4]]),
+ np.array([[5, 6], [7, 8]]),
+ np.array([[2, 8], [-1, 3]])
+]
+
+vector_operation_types = [operator.add, operator.sub]
+test_vector_arrays = [
+ np.array([42, 42]),
+ np.array([24, 24]),
+ np.array([83, 12])
+]
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_python_scope_vector_operations():
+ for ops in vector_operation_types:
+ a, b = test_vector_arrays[:2]
+ m1, m2 = ti.Vector(a), ti.Vector(b)
+ c = ops(m1, m2)
+ assert np.allclose(c.to_numpy(), ops(a, b))
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_python_scope_matrix_operations():
+ for ops in operation_types:
+ a, b = test_matrix_arrays[:2]
+ m1, m2 = ti.Matrix(a), ti.Matrix(b)
+ c = ops(m1, m2)
+ assert np.allclose(c.to_numpy(), ops(a, b))
+
+
+# TODO: Loops inside the function will cause AssertionError:
+# No new variables can be declared after kernel invocations
+# or Python-scope field accesses.
+# ideally we should use pytest.fixture to parameterize the tests
+# over explicit loops
+@pytest.mark.parametrize('ops', vector_operation_types)
+@test_utils.test(arch=get_host_arch_list())
+def test_python_scope_vector_field(ops):
+ t1 = ti.Vector.field(2, dtype=ti.i32, shape=())
+ t2 = ti.Vector.field(2, dtype=ti.i32, shape=())
+ a, b = test_vector_arrays[:2]
+ t1[None], t2[None] = a.tolist(), b.tolist()
+
+ c = ops(t1[None], t2[None])
+ assert np.allclose(c.to_numpy(), ops(a, b))
+
+
+@pytest.mark.parametrize('ops', vector_operation_types)
+@test_utils.test(arch=get_host_arch_list())
+def test_python_scope_matrix_field(ops):
+ t1 = ti.Matrix.field(2, 2, dtype=ti.i32, shape=())
+ t2 = ti.Matrix.field(2, 2, dtype=ti.i32, shape=())
+ a, b = test_matrix_arrays[:2]
+ # ndarray not supported here
+ t1[None], t2[None] = a.tolist(), b.tolist()
+
+ c = ops(t1[None], t2[None])
+ print(c)
+
+ assert np.allclose(c.to_numpy(), ops(a, b))
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_constant_matrices():
+ assert ti.cos(math.pi / 3) == test_utils.approx(0.5)
+ assert np.allclose((-ti.Vector([2, 3])).to_numpy(), np.array([-2, -3]))
+ assert ti.cos(ti.Vector([2, 3])).to_numpy() == test_utils.approx(
+ np.cos(np.array([2, 3])))
+ assert ti.max(2, 3) == 3
+ res = ti.max(4, ti.Vector([3, 4, 5]))
+ assert np.allclose(res.to_numpy(), np.array([4, 4, 5]))
+ res = ti.Vector([2, 3]) + ti.Vector([3, 4])
+ assert np.allclose(res.to_numpy(), np.array([5, 7]))
+ res = ti.atan2(ti.Vector([2, 3]), ti.Vector([3, 4]))
+ assert res.to_numpy() == test_utils.approx(
+ np.arctan2(np.array([2, 3]), np.array([3, 4])))
+ res = ti.Matrix([[2, 3], [4, 5]]) @ ti.Vector([2, 3])
+ assert np.allclose(res.to_numpy(), np.array([13, 23]))
+ v = ti.Vector([3, 4])
+ w = ti.Vector([5, -12])
+ r = ti.Vector([1, 2, 3, 4])
+ s = ti.Matrix([[1, 2], [3, 4]])
+ assert v.normalized().to_numpy() == test_utils.approx(np.array([0.6, 0.8]))
+ assert v.cross(w) == test_utils.approx(-12 * 3 - 4 * 5)
+ w.y = v.x * w[0]
+ r.x = r.y
+ r.y = r.z
+ r.z = r.w
+ r.w = r.x
+ assert np.allclose(w.to_numpy(), np.array([5, 15]))
+ assert ti.select(ti.Vector([1, 0]), ti.Vector([2, 3]),
+ ti.Vector([4, 5])) == ti.Vector([2, 5])
+ s[0, 1] = 2
+ assert s[0, 1] == 2
+
+ @ti.kernel
+ def func(t: ti.i32):
+ m = ti.Matrix([[2, 3], [4, t]])
+ print(m @ ti.Vector([2, 3]))
+ m += ti.Matrix([[3, 4], [5, t]])
+ print(m @ v)
+ print(r.x, r.y, r.z, r.w)
+ s = w.transpose() @ m
+ print(s)
+ print(m)
+
+ func(5)
+
+
+@pytest.mark.parametrize('ops', vector_operation_types)
+@test_utils.test(arch=get_host_arch_list())
+def test_taichi_scope_vector_operations_with_global_vectors(ops):
+ a, b, c = test_vector_arrays[:3]
+ m1, m2 = ti.Vector(a), ti.Vector(b)
+ r1 = ti.Vector.field(2, dtype=ti.i32, shape=())
+ r2 = ti.Vector.field(2, dtype=ti.i32, shape=())
+ m3 = ti.Vector.field(2, dtype=ti.i32, shape=())
+ m3.from_numpy(c)
+
+ @ti.kernel
+ def run():
+ r1[None] = ops(m1, m2)
+ r2[None] = ops(m1, m3[None])
+
+ run()
+
+ assert np.allclose(r1[None].to_numpy(), ops(a, b))
+ assert np.allclose(r2[None].to_numpy(), ops(a, c))
+
+
+@pytest.mark.parametrize('ops', vector_operation_types)
+@test_utils.test(arch=get_host_arch_list())
+def test_taichi_scope_matrix_operations_with_global_matrices(ops):
+ a, b, c = test_matrix_arrays[:3]
+ m1, m2 = ti.Matrix(a), ti.Matrix(b)
+ r1 = ti.Matrix.field(2, 2, dtype=ti.i32, shape=())
+ r2 = ti.Matrix.field(2, 2, dtype=ti.i32, shape=())
+ m3 = ti.Matrix.field(2, 2, dtype=ti.i32, shape=())
+ m3.from_numpy(c)
+
+ @ti.kernel
+ def run():
+ r1[None] = ops(m1, m2)
+ r2[None] = ops(m1, m3[None])
+
+ run()
+
+ assert np.allclose(r1[None].to_numpy(), ops(a, b))
+ assert np.allclose(r2[None].to_numpy(), ops(a, c))
+
+
+@test_utils.test()
+def test_matrix_non_constant_index_numpy():
+ @ti.kernel
+ def func1(a: ti.any_arr(element_dim=2)):
+ for i in range(5):
+ for j, k in ti.ndrange(2, 2):
+ a[i][j, k] = j * j + k * k
+
+ m = np.empty((5, 2, 2), dtype=np.int32)
+ func1(m)
+ assert m[1][0, 1] == 1
+ assert m[2][1, 0] == 1
+ assert m[3][1, 1] == 2
+ assert m[4][0, 1] == 1
+
+ @ti.kernel
+ def func2(b: ti.any_arr(element_dim=1, layout=ti.Layout.SOA)):
+ for i in range(5):
+ for j in range(4):
+ b[i][j * j] = j * j
+
+ v = np.empty((10, 5), dtype=np.int32)
+ func2(v)
+ assert v[0][1] == 0
+ assert v[1][1] == 1
+ assert v[4][1] == 4
+ assert v[9][1] == 9
+
+
+@test_utils.test(require=ti.extension.dynamic_index,
+ dynamic_index=True,
+ debug=True)
+def test_matrix_non_constant_index():
+ m = ti.Matrix.field(2, 2, ti.i32, 5)
+ v = ti.Vector.field(10, ti.i32, 5)
+
+ @ti.kernel
+ def func1():
+ for i in range(5):
+ for j, k in ti.ndrange(2, 2):
+ m[i][j, k] = j * j + k * k
+ assert m[1][0, 1] == 1
+ assert m[2][1, 0] == 1
+ assert m[3][1, 1] == 2
+
+ func1()
+ assert m[4][0, 1] == 1
+
+ @ti.kernel
+ def func2():
+ for i in range(5):
+ for j in range(4):
+ v[i][j * j] = j * j
+ assert v[1][0] == 0
+ assert v[1][1] == 1
+ assert v[1][4] == 4
+
+ func2()
+ assert v[1][9] == 9
+
+ @ti.kernel
+ def func3():
+ tmp = ti.Vector([1, 2, 3])
+ for i in range(3):
+ tmp[i] = i * i
+ vec = ti.Vector([4, 5, 6])
+ for j in range(3):
+ vec[tmp[i] % 3] += vec[j % 3]
+ assert tmp[0] == 0
+ assert tmp[1] == 1
+ assert tmp[2] == 4
+
+ func3()
+
+ @ti.kernel
+ def func4(k: ti.i32):
+ tmp = ti.Vector([k, k * 2, k * 3])
+ assert tmp[0] == k
+ assert tmp[1] == k * 2
+ assert tmp[2] == k * 3
+
+ func4(10)
+
+
+@test_utils.test(arch=ti.cpu)
+def test_matrix_constant_index():
+ m = ti.Matrix.field(2, 2, ti.i32, 5)
+
+ @ti.kernel
+ def func():
+ for i in range(5):
+ for j, k in ti.static(ti.ndrange(2, 2)):
+ m[i][j, k] = 12
+
+ func()
+
+ assert np.allclose(m.to_numpy(), np.ones((5, 2, 2), np.int32) * 12)
+
+
+@test_utils.test(arch=ti.cpu)
+def test_vector_to_list():
+ a = ti.Vector.field(2, float, ())
+
+ data = [2, 3]
+ b = ti.Vector(data)
+ assert list(b) == data
+ assert len(b) == len(data)
+
+ a[None] = b
+ assert all(a[None] == ti.Vector(data))
+
+
+@test_utils.test(arch=ti.cpu)
+def test_matrix_to_list():
+ a = ti.Matrix.field(2, 3, float, ())
+
+ data = [[2, 3, 4], [5, 6, 7]]
+ b = ti.Matrix(data)
+ assert list(b) == data
+ assert len(b) == len(data)
+
+ a[None] = b
+ assert all(a[None] == ti.Matrix(data))
+
+
+@test_utils.test()
+def test_matrix_needs_grad():
+ # Just make sure the usage doesn't crash, see https://github.com/taichi-dev/taichi/pull/1545
+ n = 8
+ m1 = ti.Matrix.field(2, 2, ti.f32, n, needs_grad=True)
+ m2 = ti.Matrix.field(2, 2, ti.f32, n, needs_grad=True)
+ gr = ti.Matrix.field(2, 2, ti.f32, n)
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ gr[i] = m1.grad[i] + m2.grad[i]
+
+ func()
+
+
+@test_utils.test(debug=True)
+def test_copy_python_scope_matrix_to_taichi_scope():
+ a = ti.Vector([1, 2, 3])
+
+ @ti.kernel
+ def test():
+ b = a
+ assert b[0] == 1
+ assert b[1] == 2
+ assert b[2] == 3
+ b = ti.Vector([4, 5, 6])
+ assert b[0] == 4
+ assert b[1] == 5
+ assert b[2] == 6
+
+ test()
+
+
+@test_utils.test(debug=True)
+def test_copy_matrix_field_element_to_taichi_scope():
+ a = ti.Vector.field(3, ti.i32, shape=())
+ a[None] = ti.Vector([1, 2, 3])
+
+ @ti.kernel
+ def test():
+ b = a[None]
+ assert b[0] == 1
+ assert b[1] == 2
+ assert b[2] == 3
+ b[0] = 5
+ b[1] = 9
+ b[2] = 7
+ assert b[0] == 5
+ assert b[1] == 9
+ assert b[2] == 7
+ assert a[None][0] == 1
+ assert a[None][1] == 2
+ assert a[None][2] == 3
+
+ test()
+
+
+@test_utils.test(debug=True)
+def test_copy_matrix_in_taichi_scope():
+ @ti.kernel
+ def test():
+ a = ti.Vector([1, 2, 3])
+ b = a
+ assert b[0] == 1
+ assert b[1] == 2
+ assert b[2] == 3
+ b[0] = 5
+ b[1] = 9
+ b[2] = 7
+ assert b[0] == 5
+ assert b[1] == 9
+ assert b[2] == 7
+ assert a[0] == 1
+ assert a[1] == 2
+ assert a[2] == 3
+
+ test()
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda], dynamic_index=True, debug=True)
+def test_matrix_field_dynamic_index_stride():
+ # placeholders
+ temp_a = ti.field(ti.f32)
+ temp_b = ti.field(ti.f32)
+ temp_c = ti.field(ti.f32)
+ # target
+ v = ti.Vector.field(3, ti.i32)
+ x = v.get_scalar_field(0)
+ y = v.get_scalar_field(1)
+ z = v.get_scalar_field(2)
+
+ S0 = ti.root
+ S1 = S0.pointer(ti.i, 4)
+ S2 = S1.dense(ti.i, 2)
+ S3 = S2.pointer(ti.i, 8)
+ S3.place(temp_a)
+ S4 = S2.dense(ti.i, 16)
+ S4.place(x)
+ S5 = S1.dense(ti.i, 2)
+ S6 = S5.pointer(ti.i, 8)
+ S6.place(temp_b)
+ S7 = S5.dense(ti.i, 16)
+ S7.place(y)
+ S8 = S1.dense(ti.i, 2)
+ S9 = S8.dense(ti.i, 32)
+ S9.place(temp_c)
+ S10 = S8.dense(ti.i, 16)
+ S10.place(z)
+
+ @ti.kernel
+ def check_stride():
+ for i in range(128):
+ assert ti.get_addr(y, i) - ti.get_addr(x,
+ i) == v.dynamic_index_stride
+ assert ti.get_addr(z, i) - ti.get_addr(y,
+ i) == v.dynamic_index_stride
+
+ check_stride()
+
+ @ti.kernel
+ def run():
+ for i in range(128):
+ for j in range(3):
+ v[i][j] = i * j
+
+ run()
+ for i in range(128):
+ for j in range(3):
+ assert v[i][j] == i * j
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_matrix_field_dynamic_index_different_path_length():
+ v = ti.Vector.field(2, ti.i32)
+ x = v.get_scalar_field(0)
+ y = v.get_scalar_field(1)
+
+ ti.root.dense(ti.i, 8).place(x)
+ ti.root.dense(ti.i, 2).dense(ti.i, 4).place(y)
+
+ impl.get_runtime().materialize()
+ assert v.dynamic_index_stride is None
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_matrix_field_dynamic_index_not_pure_dense():
+ v = ti.Vector.field(2, ti.i32)
+ x = v.get_scalar_field(0)
+ y = v.get_scalar_field(1)
+
+ ti.root.dense(ti.i, 2).pointer(ti.i, 4).place(x)
+ ti.root.dense(ti.i, 2).dense(ti.i, 4).place(y)
+
+ impl.get_runtime().materialize()
+ assert v.dynamic_index_stride is None
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_matrix_field_dynamic_index_different_cell_size_bytes():
+ temp = ti.field(ti.f32)
+
+ v = ti.Vector.field(2, ti.i32)
+ x = v.get_scalar_field(0)
+ y = v.get_scalar_field(1)
+
+ ti.root.dense(ti.i, 8).place(x, temp)
+ ti.root.dense(ti.i, 8).place(y)
+
+ impl.get_runtime().materialize()
+ assert v.dynamic_index_stride is None
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_matrix_field_dynamic_index_different_offset_bytes_in_parent_cell():
+ temp_a = ti.field(ti.f32)
+ temp_b = ti.field(ti.f32)
+
+ v = ti.Vector.field(2, ti.i32)
+ x = v.get_scalar_field(0)
+ y = v.get_scalar_field(1)
+
+ ti.root.dense(ti.i, 8).place(temp_a, x)
+ ti.root.dense(ti.i, 8).place(y, temp_b)
+
+ impl.get_runtime().materialize()
+ assert v.dynamic_index_stride is None
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_matrix_field_dynamic_index_different_stride():
+ temp = ti.field(ti.f32)
+
+ v = ti.Vector.field(3, ti.i32)
+ x = v.get_scalar_field(0)
+ y = v.get_scalar_field(1)
+ z = v.get_scalar_field(2)
+
+ ti.root.dense(ti.i, 8).place(x, y, temp, z)
+
+ impl.get_runtime().materialize()
+ assert v.dynamic_index_stride is None
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda], dynamic_index=True)
+def test_matrix_field_dynamic_index_multiple_materialize():
+ @ti.kernel
+ def empty():
+ pass
+
+ empty()
+
+ n = 5
+ a = ti.Vector.field(3, dtype=ti.i32, shape=n)
+
+ @ti.kernel
+ def func():
+ for i in a:
+ a[i][i % 3] = i
+
+ func()
+ for i in range(n):
+ for j in range(3):
+ assert a[i][j] == (i if j == i % 3 else 0)
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda], dynamic_index=True, debug=True)
+def test_local_vector_initialized_in_a_loop():
+ @ti.kernel
+ def foo():
+ for c in range(10):
+ p = ti.Vector([c, c * 2])
+ for i in range(2):
+ assert p[i] == c * (i + 1)
+
+ foo()
diff --git a/tests/_python_orig/test_matrix_arg.py b/tests/_python_orig/test_matrix_arg.py
new file mode 100644
index 000000000..f07722f5f
--- /dev/null
+++ b/tests/_python_orig/test_matrix_arg.py
@@ -0,0 +1,37 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_matrix_arg():
+ mat1 = ti.Matrix([[1, 2, 3], [4, 5, 6]])
+
+ @ti.kernel
+ def foo(mat: ti.types.matrix(2, 3, ti.i32)) -> ti.i32:
+ return mat[0, 0] + mat[1, 2]
+
+ assert foo(mat1) == 7
+
+ mat3 = ti.Matrix([[1, 2], [3, 4], [5, 6]])
+
+ @ti.kernel
+ def foo2(var: ti.i32, mat: ti.types.matrix(3, 2, ti.i32)) -> ti.i32:
+ for i in ti.static(range(3)):
+ for j in ti.static(range(2)):
+ mat[i, j] += var
+ return mat[2, 1]
+
+ assert foo2(3, mat3) == 9
+
+
+@test_utils.test()
+def test_vector_arg():
+ vec1 = ti.Vector([1, 2, 3])
+
+ @ti.kernel
+ def foo(vec: ti.types.vector(3, ti.i32)) -> int:
+ return vec[0] + vec[1] + vec[2]
+
+ assert foo(vec1) == 6
diff --git a/tests/_python_orig/test_matrix_different_type.py b/tests/_python_orig/test_matrix_different_type.py
new file mode 100644
index 000000000..5fd00d78e
--- /dev/null
+++ b/tests/_python_orig/test_matrix_different_type.py
@@ -0,0 +1,113 @@
+from pytest import approx
+
+import taichi as ti
+from tests import test_utils
+
+
+# TODO: test more matrix operations
+@test_utils.test()
+def test_vector():
+ type_list = [ti.f32, ti.i32]
+
+ a = ti.Vector.field(len(type_list), dtype=type_list, shape=())
+ b = ti.Vector.field(len(type_list), dtype=type_list, shape=())
+ c = ti.Vector.field(len(type_list), dtype=type_list, shape=())
+
+ @ti.kernel
+ def init():
+ a[None] = [1.0, 3]
+ b[None] = [2.0, 4]
+ c[None] = a[None] + b[None]
+
+ def verify():
+ assert isinstance(a[None][0], float)
+ assert isinstance(a[None][1], int)
+ assert isinstance(b[None][0], float)
+ assert isinstance(b[None][1], int)
+ assert c[None][0] == 3.0
+ assert c[None][1] == 7
+
+ init()
+ verify()
+
+
+# TODO: Support different element types of Matrix on opengl
+@test_utils.test(require=ti.extension.data64, exclude=ti.opengl)
+def test_matrix():
+ type_list = [[ti.f32, ti.i32], [ti.i64, ti.f32]]
+ a = ti.Matrix.field(len(type_list),
+ len(type_list[0]),
+ dtype=type_list,
+ shape=())
+ b = ti.Matrix.field(len(type_list),
+ len(type_list[0]),
+ dtype=type_list,
+ shape=())
+ c = ti.Matrix.field(len(type_list),
+ len(type_list[0]),
+ dtype=type_list,
+ shape=())
+
+ @ti.kernel
+ def init():
+ a[None] = [[1.0, 3], [1, 3.0]]
+ b[None] = [[2.0, 4], [-2, -3.0]]
+ c[None] = a[None] + b[None]
+
+ def verify():
+ assert isinstance(a[None][0], float)
+ assert isinstance(a[None][1], int)
+ assert isinstance(b[None][0], float)
+ assert isinstance(b[None][1], int)
+ assert c[None][0, 0] == 3.0
+ assert c[None][0, 1] == 7
+ assert c[None][1, 0] == -1
+ assert c[None][1, 1] == 0.0
+
+ init()
+ verify()
+
+
+@test_utils.test(require=ti.extension.quant_basic)
+def test_custom_type():
+ cit1 = ti.types.quantized_types.quant.int(bits=10, signed=True)
+ cft1 = ti.types.quantized_types.type_factory.custom_float(cit1, scale=0.1)
+ cit2 = ti.types.quantized_types.quant.int(bits=22, signed=False)
+ cft2 = ti.types.quantized_types.type_factory.custom_float(cit2, scale=0.1)
+ type_list = [[cit1, cft2], [cft1, cit2]]
+ a = ti.Matrix.field(len(type_list), len(type_list[0]), dtype=type_list)
+ b = ti.Matrix.field(len(type_list), len(type_list[0]), dtype=type_list)
+ c = ti.Matrix.field(len(type_list), len(type_list[0]), dtype=type_list)
+ ti.root.dense(ti.i,
+ 1).bit_struct(num_bits=32).place(a.get_scalar_field(0, 0),
+ a.get_scalar_field(0, 1))
+ ti.root.dense(ti.i,
+ 1).bit_struct(num_bits=32).place(a.get_scalar_field(1, 0),
+ a.get_scalar_field(1, 1))
+ ti.root.dense(ti.i,
+ 1).bit_struct(num_bits=32).place(b.get_scalar_field(0, 0),
+ b.get_scalar_field(0, 1))
+ ti.root.dense(ti.i,
+ 1).bit_struct(num_bits=32).place(b.get_scalar_field(1, 0),
+ b.get_scalar_field(1, 1))
+ ti.root.dense(ti.i,
+ 1).bit_struct(num_bits=32).place(c.get_scalar_field(0, 0),
+ c.get_scalar_field(0, 1))
+ ti.root.dense(ti.i,
+ 1).bit_struct(num_bits=32).place(c.get_scalar_field(1, 0),
+ c.get_scalar_field(1, 1))
+
+ @ti.kernel
+ def init():
+ a[0] = [[1, 3.], [2., 1]]
+ b[0] = [[2, 4.], [-2., 1]]
+ c[0] = a[0] + b[0]
+
+ def verify():
+ assert c[0][0, 0] == approx(3, 1e-3)
+ assert c[0][0, 1] == approx(7.0, 1e-3)
+ assert c[0][1, 0] == approx(0, 1e-3)
+ assert c[0][1, 1] == approx(2, 1e-3)
+
+ init()
+ verify()
diff --git a/tests/_python_orig/test_memory.py b/tests/_python_orig/test_memory.py
new file mode 100644
index 000000000..f90c2fc30
--- /dev/null
+++ b/tests/_python_orig/test_memory.py
@@ -0,0 +1,10 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=ti.cuda)
+def test_memory_allocate():
+ HUGE_SIZE = 1024**2 * 128
+ x = ti.field(ti.i32, shape=(HUGE_SIZE, ))
+ for i in range(10):
+ x[i] = i
diff --git a/tests/_python_orig/test_mesh.py b/tests/_python_orig/test_mesh.py
new file mode 100644
index 000000000..19a1b178e
--- /dev/null
+++ b/tests/_python_orig/test_mesh.py
@@ -0,0 +1,344 @@
+import os
+
+import numpy as np
+
+import taichi as ti
+from tests import test_utils
+
+this_dir = os.path.dirname(os.path.abspath(__file__))
+model_file_path = os.path.join(this_dir, 'ell.json')
+
+
+@test_utils.test(require=ti.extension.mesh)
+def test_mesh_patch_idx():
+ mesh_builder = ti.Mesh.Tet()
+ mesh_builder.verts.place({'idx': ti.i32})
+ model = mesh_builder.build(ti.Mesh.load_meta(model_file_path))
+
+ @ti.kernel
+ def foo():
+ for v in model.verts:
+ v.idx = ti.mesh_patch_idx()
+
+ foo()
+ idx = model.verts.idx.to_numpy()
+ assert idx[0] == 6
+ assert idx.sum() == 89
+
+
+def _test_mesh_for(cell_reorder=False, vert_reorder=False, extra_tests=True):
+ mesh_builder = ti.Mesh.Tet()
+ mesh_builder.verts.place({'t': ti.i32}, reorder=vert_reorder)
+ mesh_builder.cells.place({'t': ti.i32}, reorder=cell_reorder)
+ mesh_builder.cells.link(mesh_builder.verts)
+ mesh_builder.verts.link(mesh_builder.cells)
+ mesh_builder.cells.link(mesh_builder.cells)
+ mesh_builder.verts.link(mesh_builder.verts)
+ model = mesh_builder.build(ti.Mesh.load_meta(model_file_path))
+
+ @ti.kernel
+ def cell_vert():
+ for c in model.cells:
+ for j in range(c.verts.size):
+ c.t += c.verts[j].id
+
+ cell_vert()
+ total = model.cells.t.to_numpy().sum()
+ model.cells.t.fill(0)
+ assert total == 892
+
+ @ti.kernel
+ def vert_cell():
+ for v in model.verts:
+ for j in range(v.cells.size):
+ v.t += v.cells[j].id
+
+ vert_cell()
+ total = model.verts.t.to_numpy().sum()
+ model.verts.t.fill(0)
+ assert total == 1104
+
+ if not extra_tests:
+ return
+
+ @ti.kernel
+ def cell_cell():
+ for c in model.cells:
+ for j in range(c.cells.size):
+ c.t += c.cells[j].id
+
+ cell_cell()
+ total = model.cells.t.to_numpy().sum()
+ model.cells.t.fill(0)
+ assert total == 690
+
+ @ti.kernel
+ def vert_vert():
+ for v in model.verts:
+ for j in range(v.verts.size):
+ v.t += v.verts[j].id
+
+ vert_vert()
+ total = model.verts.t.to_numpy().sum()
+ model.verts.t.fill(0)
+ assert total == 1144
+
+
+@test_utils.test(require=ti.extension.mesh)
+def test_mesh_for():
+ _test_mesh_for(False, False)
+ _test_mesh_for(False, True)
+
+
+@test_utils.test(require=ti.extension.mesh,
+ optimize_mesh_reordered_mapping=False)
+def test_mesh_reordered_opt():
+ _test_mesh_for(True, True, False)
+
+
+@test_utils.test(require=ti.extension.mesh, mesh_localize_to_end_mapping=False)
+def test_mesh_localize_mapping0():
+ _test_mesh_for(False, False, False)
+ _test_mesh_for(True, True, False)
+
+
+@test_utils.test(require=ti.extension.mesh,
+ mesh_localize_from_end_mapping=True)
+def test_mesh_localize_mapping1():
+ _test_mesh_for(False, False, False)
+ _test_mesh_for(True, True, False)
+
+
+@test_utils.test(require=ti.extension.mesh)
+def test_mesh_reorder():
+ vec3i = ti.types.vector(3, ti.i32)
+ mesh_builder = ti.Mesh.Tet()
+ mesh_builder.verts.place({'s': ti.i32, 's3': vec3i}, reorder=True)
+ mesh_builder.cells.link(mesh_builder.verts)
+ model = mesh_builder.build(ti.Mesh.load_meta(model_file_path))
+
+ id2 = np.array([x**2 for x in range(len(model.verts))])
+ id123 = np.array([[x**1, x**2, x**3] for x in range(len(model.verts))])
+ model.verts.s.from_numpy(id2)
+ model.verts.s3.from_numpy(id123)
+
+ @ti.kernel
+ def foo():
+ for v in model.verts:
+ assert v.s == v.id**2
+ assert v.s3[0] == v.id**1 and v.s3[1] == v.id**2 and v.s3[
+ 2] == v.id**3
+ v.s = v.id**3
+ v.s3 *= v.id
+
+ foo()
+
+ id3 = model.verts.s.to_numpy()
+ id234 = model.verts.s3.to_numpy()
+
+ for i in range(len(model.verts)):
+ assert model.verts.s[i] == i**3
+ assert id3[i] == i**3
+ assert model.verts.s3[i][0] == i**2
+ assert model.verts.s3[i][1] == i**3
+ assert model.verts.s3[i][2] == i**4
+ assert id234[i][0] == i**2
+ assert id234[i][1] == i**3
+ assert id234[i][2] == i**4
+
+
+@test_utils.test(require=ti.extension.mesh)
+def test_mesh_minor_relations():
+ mesh_builder = ti.Mesh.Tet()
+ mesh_builder.verts.place({'y': ti.i32})
+ mesh_builder.edges.place({'x': ti.i32})
+ mesh_builder.cells.link(mesh_builder.edges)
+ mesh_builder.verts.link(mesh_builder.cells)
+ model = mesh_builder.build(ti.Mesh.load_meta(model_file_path))
+ model.edges.x.fill(1)
+
+ @ti.kernel
+ def foo():
+ for v in model.verts:
+ for i in range(v.cells.size):
+ c = v.cells[i]
+ for j in range(c.edges.size):
+ e = c.edges[j]
+ v.y += e.x
+
+ foo()
+ total = model.verts.y.to_numpy().sum()
+ assert total == 576
+
+
+@test_utils.test(require=ti.extension.mesh, demote_no_access_mesh_fors=True)
+def test_multiple_meshes():
+ mesh_builder = ti.Mesh.Tet()
+ mesh_builder.verts.place({'y': ti.i32})
+ meta = ti.Mesh.load_meta(model_file_path)
+ model1 = mesh_builder.build(meta)
+ model2 = mesh_builder.build(meta)
+
+ model1.verts.y.from_numpy(
+ np.array([x**2 for x in range(len(model1.verts))]))
+
+ @ti.kernel
+ def foo():
+ for v in model1.verts:
+ model2.verts.y[v.id] = v.y
+
+ foo()
+ out = model2.verts.y.to_numpy()
+ for i in range(len(out)):
+ assert out[i] == i**2
+
+
+@test_utils.test(require=ti.extension.mesh)
+def test_mesh_local():
+ mesh_builder = ti.Mesh.Tet()
+ mesh_builder.verts.place({'a': ti.i32})
+ mesh_builder.faces.link(mesh_builder.verts)
+ model = mesh_builder.build(ti.Mesh.load_meta(model_file_path))
+ ext_a = ti.field(ti.i32, shape=len(model.verts))
+
+ @ti.kernel
+ def foo(cache: ti.template()):
+ if ti.static(cache):
+ ti.mesh_local(ext_a, model.verts.a)
+ for f in model.faces:
+ m = f.verts[0].id + f.verts[1].id + f.verts[2].id
+ f.verts[0].a += m
+ f.verts[1].a += m
+ f.verts[2].a += m
+ ext_a[f.verts[0].id] += m
+ ext_a[f.verts[1].id] += m
+ ext_a[f.verts[2].id] += m
+
+ foo(False)
+ res1 = model.verts.a.to_numpy()
+ res2 = ext_a.to_numpy()
+ model.verts.a.fill(0)
+ ext_a.fill(0)
+ foo(True)
+ res3 = model.verts.a.to_numpy()
+ res4 = ext_a.to_numpy()
+
+ for i in range(len(model.verts)):
+ assert res1[i] == res2[i]
+ assert res1[i] == res3[i]
+ assert res1[i] == res4[i]
+
+
+@test_utils.test(require=ti.extension.mesh, experimental_auto_mesh_local=True)
+def test_auto_mesh_local():
+ mesh_builder = ti.Mesh.Tet()
+ mesh_builder.verts.place({'a': ti.i32, 's': ti.i32})
+ mesh_builder.faces.link(mesh_builder.verts)
+ model = mesh_builder.build(ti.Mesh.load_meta(model_file_path))
+ ext_a = ti.field(ti.i32, shape=len(model.verts))
+
+ @ti.kernel
+ def foo(cache: ti.template()):
+ for v in model.verts:
+ v.s = v.id
+ if ti.static(cache):
+ ti.mesh_local(ext_a, model.verts.a)
+ for f in model.faces:
+ m = f.verts[0].s + f.verts[1].s + f.verts[2].s
+ f.verts[0].a += m
+ f.verts[1].a += m
+ f.verts[2].a += m
+ for i in range(3):
+ ext_a[f.verts[i].id] += m
+
+ foo(False)
+ res1 = model.verts.a.to_numpy()
+ res2 = ext_a.to_numpy()
+ model.verts.a.fill(0)
+ ext_a.fill(0)
+ foo(True)
+ res3 = model.verts.a.to_numpy()
+ res4 = ext_a.to_numpy()
+
+ for i in range(len(model.verts)):
+ assert res1[i] == res2[i]
+ assert res1[i] == res3[i]
+ assert res1[i] == res4[i]
+
+
+@test_utils.test(require=ti.extension.mesh)
+def test_nested_mesh_for():
+ mesh_builder = ti.Mesh.Tet()
+ mesh_builder.faces.place({'a': ti.i32, 'b': ti.i32})
+ mesh_builder.faces.link(mesh_builder.verts)
+ model = mesh_builder.build(ti.Mesh.load_meta(model_file_path))
+
+ @ti.kernel
+ def foo():
+ for f in model.faces:
+ for i in range(f.verts.size):
+ f.a += f.verts[i].id
+ for v in f.verts:
+ f.b += v.id
+
+ a = model.faces.a.to_numpy()
+ b = model.faces.b.to_numpy()
+ assert (a == b).all() == 1
+
+
+@test_utils.test(require=ti.extension.mesh)
+def test_multiple_mesh_major_relations():
+ mesh = ti.TetMesh()
+ mesh.verts.place({
+ 's': ti.i32,
+ 's_': ti.i32,
+ 's1': ti.i32,
+ 'a': ti.i32,
+ 'b': ti.i32,
+ 'c': ti.i32
+ })
+ mesh.edges.place({'s2': ti.i32})
+ mesh.cells.place({'s3': ti.i32})
+ mesh.verts.link(mesh.verts)
+ mesh.verts.link(mesh.edges)
+ mesh.verts.link(mesh.cells)
+
+ model = mesh.build(ti.Mesh.load_meta(model_file_path))
+
+ @ti.kernel
+ def foo():
+ for u in model.verts:
+ u.s1 = u.id
+ for e in model.edges:
+ e.s2 = e.id
+ for c in model.cells:
+ c.s3 = c.id
+
+ ti.mesh_local(model.verts.s1, model.edges.s2, model.cells.s3)
+ for u in model.verts:
+ a, b, c = 0, 0, 0
+ for i in range(u.verts.size):
+ a += u.verts[i].s1
+ for i in range(u.edges.size):
+ b += u.edges[i].s2
+ for i in range(u.cells.size):
+ c += u.cells[i].s3
+ u.s = a * b * c
+
+ for u in model.verts:
+ for i in range(u.verts.size):
+ u.a += u.verts[i].s1
+ for u in model.verts:
+ for i in range(u.edges.size):
+ u.b += u.edges[i].s2
+ for u in model.verts:
+ for i in range(u.cells.size):
+ u.c += u.cells[i].s3
+ for u in model.verts:
+ u.s_ = u.a * u.b * u.c
+
+ foo()
+
+ sum1 = model.verts.s.to_numpy().sum()
+ sum2 = model.verts.s_.to_numpy().sum()
+ assert sum1 == sum2
diff --git a/tests/_python_orig/test_mod.py b/tests/_python_orig/test_mod.py
new file mode 100644
index 000000000..987b1d7c4
--- /dev/null
+++ b/tests/_python_orig/test_mod.py
@@ -0,0 +1,68 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def _test_py_style_mod(arg1, a, arg2, b, arg3, c):
+ z = ti.field(arg3, shape=())
+
+ @ti.kernel
+ def func(x: arg1, y: arg2):
+ z[None] = x % y
+
+ func(a, b)
+ assert z[None] == c
+
+
+@test_utils.test()
+def _test_c_style_mod(arg1, a, arg2, b, arg3, c):
+ z = ti.field(arg3, shape=())
+
+ @ti.kernel
+ def func(x: arg1, y: arg2):
+ z[None] = ti.raw_mod(x, y)
+
+ func(a, b)
+ assert z[None] == c
+
+
+def test_py_style_mod():
+ def func(a, b):
+ _test_py_style_mod(ti.i32, a, ti.i32, b, ti.i32, a % b)
+
+ func(10, 3)
+ func(-10, 3)
+ func(10, -3)
+ func(-10, -3)
+
+
+def _c_mod(a, b):
+ return a - b * int(float(a) / b)
+
+
+def test_c_style_mod():
+ def func(a, b):
+ _test_c_style_mod(ti.i32, a, ti.i32, b, ti.i32, _c_mod(a, b))
+
+ func(10, 3)
+ func(-10, 3)
+ func(10, -3)
+ func(-10, -3)
+
+
+@test_utils.test()
+def test_mod_scan():
+ z = ti.field(ti.i32, shape=())
+ w = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func(x: ti.i32, y: ti.i32):
+ z[None] = x % y
+ w[None] = ti.raw_mod(x, y)
+
+ for i in range(-10, 11):
+ for j in range(-10, 11):
+ if j != 0:
+ func(i, j)
+ assert z[None] == i % j
+ assert w[None] == _c_mod(i, j)
diff --git a/tests/_python_orig/test_module_import.py b/tests/_python_orig/test_module_import.py
new file mode 100644
index 000000000..35f4ff4a9
--- /dev/null
+++ b/tests/_python_orig/test_module_import.py
@@ -0,0 +1,12 @@
+import taichi as myowntaichi
+from tests import test_utils
+
+
+@test_utils.test()
+def test_module_import():
+ @myowntaichi.kernel
+ def func():
+ for _ in myowntaichi.static(range(8)):
+ pass
+
+ func()
diff --git a/tests/_python_orig/test_mpm88.py b/tests/_python_orig/test_mpm88.py
new file mode 100644
index 000000000..c52d30acc
--- /dev/null
+++ b/tests/_python_orig/test_mpm88.py
@@ -0,0 +1,241 @@
+import os
+
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+def run_mpm88_test():
+ dim = 2
+ N = 64
+ n_particles = N * N
+ n_grid = 128
+ dx = 1 / n_grid
+ inv_dx = 1 / dx
+ dt = 2.0e-4
+ p_vol = (dx * 0.5)**2
+ p_rho = 1
+ p_mass = p_vol * p_rho
+ E = 400
+
+ x = ti.Vector.field(dim, dtype=ti.f32, shape=n_particles)
+ v = ti.Vector.field(dim, dtype=ti.f32, shape=n_particles)
+ C = ti.Matrix.field(dim, dim, dtype=ti.f32, shape=n_particles)
+ J = ti.field(dtype=ti.f32, shape=n_particles)
+ grid_v = ti.Vector.field(dim, dtype=ti.f32, shape=(n_grid, n_grid))
+ grid_m = ti.field(dtype=ti.f32, shape=(n_grid, n_grid))
+
+ @ti.kernel
+ def substep():
+ for p in x:
+ base = (x[p] * inv_dx - 0.5).cast(int)
+ fx = x[p] * inv_dx - base.cast(float)
+ w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
+ stress = -dt * p_vol * (J[p] - 1) * 4 * inv_dx * inv_dx * E
+ affine = ti.Matrix([[stress, 0], [0, stress]]) + p_mass * C[p]
+ for i in ti.static(range(3)):
+ for j in ti.static(range(3)):
+ offset = ti.Vector([i, j])
+ dpos = (offset.cast(float) - fx) * dx
+ weight = w[i][0] * w[j][1]
+ ti.atomic_add(grid_v[base + offset],
+ weight * (p_mass * v[p] + affine @ dpos))
+ ti.atomic_add(grid_m[base + offset], weight * p_mass)
+
+ for i, j in grid_m:
+ if grid_m[i, j] > 0:
+ bound = 3
+ inv_m = 1 / grid_m[i, j]
+ grid_v[i, j] = inv_m * grid_v[i, j]
+ grid_v[i, j][1] -= dt * 9.8
+ if i < bound and grid_v[i, j][0] < 0:
+ grid_v[i, j][0] = 0
+ if i > n_grid - bound and grid_v[i, j][0] > 0:
+ grid_v[i, j][0] = 0
+ if j < bound and grid_v[i, j][1] < 0:
+ grid_v[i, j][1] = 0
+ if j > n_grid - bound and grid_v[i, j][1] > 0:
+ grid_v[i, j][1] = 0
+
+ for p in x:
+ base = (x[p] * inv_dx - 0.5).cast(int)
+ fx = x[p] * inv_dx - base.cast(float)
+ w = [
+ 0.5 * (1.5 - fx)**2, 0.75 - (fx - 1.0)**2, 0.5 * (fx - 0.5)**2
+ ]
+ new_v = ti.Vector.zero(ti.f32, 2)
+ new_C = ti.Matrix.zero(ti.f32, 2, 2)
+ for i in ti.static(range(3)):
+ for j in ti.static(range(3)):
+ dpos = ti.Vector([i, j]).cast(float) - fx
+ g_v = grid_v[base + ti.Vector([i, j])]
+ weight = w[i][0] * w[j][1]
+ new_v += weight * g_v
+ new_C += 4 * weight * g_v.outer_product(dpos) * inv_dx
+ v[p] = new_v
+ x[p] += dt * v[p]
+ J[p] *= 1 + dt * new_C.trace()
+ C[p] = new_C
+
+ for i in range(n_particles):
+ x[i] = [i % N / N * 0.4 + 0.2, i / N / N * 0.4 + 0.05]
+ v[i] = [0, -3]
+ J[i] = 1
+
+ for frame in range(10):
+ for s in range(50):
+ grid_v.fill([0, 0])
+ grid_m.fill(0)
+ substep()
+
+ pos = x.to_numpy()
+ pos[:, 1] *= 2
+ regression = [
+ 0.31722742,
+ 0.15826741,
+ 0.10224003,
+ 0.07810827,
+ ]
+ for i in range(4):
+ assert (pos**(i + 1)).mean() == test_utils.approx(regression[i],
+ rel=1e-2)
+
+
+@test_utils.test()
+def test_mpm88():
+ run_mpm88_test()
+
+
+def _is_appveyor():
+ # AppVeyor adds `APPVEYOR=True` ('true' on Ubuntu)
+ # https://www.appveyor.com/docs/environment-variables/
+ return os.getenv('APPVEYOR', '').lower() == 'true'
+
+
+#TODO: Remove exclude of ti.metal
+@pytest.mark.skipif(_is_appveyor(), reason='Stuck on Appveyor.')
+@test_utils.test(require=ti.extension.async_mode,
+ exclude=[ti.metal],
+ async_mode=True)
+def test_mpm88_async():
+ # It seems that all async tests on Appveyor run super slow. For example,
+ # on Appveyor, 10+ tests have passed during the execution of
+ # test_fuse_dense_x2y2z. Maybe thread synchronizations are expensive?
+ run_mpm88_test()
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda, ti.opengl])
+def test_mpm88_numpy_and_ndarray():
+ import numpy as np
+
+ dim = 2
+ N = 64
+ n_particles = N * N
+ n_grid = 128
+ dx = 1 / n_grid
+ inv_dx = 1 / dx
+ dt = 2.0e-4
+ p_vol = (dx * 0.5)**2
+ p_rho = 1
+ p_mass = p_vol * p_rho
+ E = 400
+
+ @ti.kernel
+ def substep(x: ti.any_arr(element_dim=1), v: ti.any_arr(element_dim=1),
+ C: ti.any_arr(element_dim=2), J: ti.any_arr(),
+ grid_v: ti.any_arr(element_dim=1), grid_m: ti.any_arr()):
+ for p in x:
+ base = (x[p] * inv_dx - 0.5).cast(int)
+ fx = x[p] * inv_dx - base.cast(float)
+ w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
+ stress = -dt * p_vol * (J[p] - 1) * 4 * inv_dx * inv_dx * E
+ affine = ti.Matrix([[stress, 0], [0, stress]]) + p_mass * C[p]
+ for i in ti.static(range(3)):
+ for j in ti.static(range(3)):
+ offset = ti.Vector([i, j])
+ dpos = (offset.cast(float) - fx) * dx
+ weight = w[i][0] * w[j][1]
+ ti.atomic_add(grid_v[base + offset],
+ weight * (p_mass * v[p] + affine @ dpos))
+ ti.atomic_add(grid_m[base + offset], weight * p_mass)
+
+ for i, j in grid_m:
+ if grid_m[i, j] > 0:
+ bound = 3
+ inv_m = 1 / grid_m[i, j]
+ grid_v[i, j] = inv_m * grid_v[i, j]
+ grid_v[i, j][1] -= dt * 9.8
+ if i < bound and grid_v[i, j][0] < 0:
+ grid_v[i, j][0] = 0
+ if i > n_grid - bound and grid_v[i, j][0] > 0:
+ grid_v[i, j][0] = 0
+ if j < bound and grid_v[i, j][1] < 0:
+ grid_v[i, j][1] = 0
+ if j > n_grid - bound and grid_v[i, j][1] > 0:
+ grid_v[i, j][1] = 0
+
+ for p in x:
+ base = (x[p] * inv_dx - 0.5).cast(int)
+ fx = x[p] * inv_dx - base.cast(float)
+ w = [
+ 0.5 * (1.5 - fx)**2, 0.75 - (fx - 1.0)**2, 0.5 * (fx - 0.5)**2
+ ]
+ new_v = ti.Vector.zero(ti.f32, 2)
+ new_C = ti.Matrix.zero(ti.f32, 2, 2)
+ for i in ti.static(range(3)):
+ for j in ti.static(range(3)):
+ dpos = ti.Vector([i, j]).cast(float) - fx
+ g_v = grid_v[base + ti.Vector([i, j])]
+ weight = w[i][0] * w[j][1]
+ new_v += weight * g_v
+ new_C += 4 * weight * g_v.outer_product(dpos) * inv_dx
+ v[p] = new_v
+ x[p] += dt * v[p]
+ J[p] *= 1 + dt * new_C.trace()
+ C[p] = new_C
+
+ def run_test(x, v, C, J, grid_v, grid_m):
+ for i in range(n_particles):
+ x[i] = [i % N / N * 0.4 + 0.2, i / N / N * 0.4 + 0.05]
+ v[i] = [0, -3]
+ J[i] = 1
+
+ for frame in range(10):
+ for s in range(50):
+ grid_v.fill(0)
+ grid_m.fill(0)
+ substep(x, v, C, J, grid_v, grid_m)
+
+ pos = x if isinstance(x, np.ndarray) else x.to_numpy()
+ pos[:, 1] *= 2
+ regression = [
+ 0.31722742,
+ 0.15826741,
+ 0.10224003,
+ 0.07810827,
+ ]
+ for i in range(4):
+ assert (pos**(i + 1)).mean() == test_utils.approx(regression[i],
+ rel=1e-2)
+
+ def test_numpy():
+ x = np.zeros((n_particles, dim), dtype=np.float32)
+ v = np.zeros((n_particles, dim), dtype=np.float32)
+ C = np.zeros((n_particles, dim, dim), dtype=np.float32)
+ J = np.zeros(n_particles, dtype=np.float32)
+ grid_v = np.zeros((n_grid, n_grid, dim), dtype=np.float32)
+ grid_m = np.zeros((n_grid, n_grid), dtype=np.float32)
+ run_test(x, v, C, J, grid_v, grid_m)
+
+ def test_ndarray():
+ x = ti.Vector.ndarray(dim, ti.f32, n_particles)
+ v = ti.Vector.ndarray(dim, ti.f32, n_particles)
+ C = ti.Matrix.ndarray(dim, dim, ti.f32, n_particles)
+ J = ti.ndarray(ti.f32, n_particles)
+ grid_v = ti.Vector.ndarray(dim, ti.f32, (n_grid, n_grid))
+ grid_m = ti.ndarray(ti.f32, (n_grid, n_grid))
+ run_test(x, v, C, J, grid_v, grid_m)
+
+ test_numpy()
+ test_ndarray()
diff --git a/tests/_python_orig/test_mpm_particle_list.py b/tests/_python_orig/test_mpm_particle_list.py
new file mode 100644
index 000000000..372471367
--- /dev/null
+++ b/tests/_python_orig/test_mpm_particle_list.py
@@ -0,0 +1,63 @@
+import random
+
+import taichi as ti
+from tests import test_utils
+
+
+@ti.data_oriented
+class MPMSolver:
+ def __init__(self, res):
+ dim = len(res)
+ self.dx = 1 / res[0]
+ self.inv_dx = 1.0 / self.dx
+ self.pid = ti.field(ti.i32)
+ self.x = ti.Vector.field(dim, dtype=ti.f32)
+ self.grid_m = ti.field(dtype=ti.f32)
+
+ indices = ti.ij
+
+ self.grid = ti.root.pointer(indices, 32)
+ block = self.grid.pointer(indices, 16)
+ voxel = block.dense(indices, 8)
+
+ voxel.place(self.grid_m)
+ block.dynamic(ti.axes(dim), 1024 * 1024,
+ chunk_size=4096).place(self.pid)
+
+ ti.root.dynamic(ti.i, 2**25, 2**20).place(self.x)
+ self.substeps = 0
+
+ for i in range(10000):
+ self.x[i] = [random.random() * 0.5, random.random() * 0.5]
+
+ @ti.kernel
+ def build_pid(self):
+ ti.block_dim(256)
+ for p in self.x:
+ base = ti.floor(self.x[p] * self.inv_dx - 0.5).cast(int) + 1
+ ti.append(self.pid.parent(), base, p)
+
+ def step(self):
+ for i in range(1000):
+ self.substeps += 1
+ self.grid.deactivate_all()
+ self.build_pid()
+
+
+@test_utils.test(require=ti.extension.sparse,
+ exclude=[ti.metal],
+ device_memory_GB=1.0)
+def test_mpm_particle_list_no_leakage():
+ # By default Taichi will allocate 0.5 GB for testing.
+ mpm = MPMSolver(res=(128, 128))
+ mpm.step()
+
+
+@test_utils.test(require=[ti.extension.sparse, ti.extension.packed],
+ exclude=[ti.metal],
+ device_memory_GB=1.0,
+ packed=True)
+def test_mpm_particle_list_no_leakage_packed():
+ # By default Taichi will allocate 0.5 GB for testing.
+ mpm = MPMSolver(res=(128, 128))
+ mpm.step()
diff --git a/tests/_python_orig/test_name_error.py b/tests/_python_orig/test_name_error.py
new file mode 100644
index 000000000..e604c436d
--- /dev/null
+++ b/tests/_python_orig/test_name_error.py
@@ -0,0 +1,15 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_name_error():
+ with pytest.raises(ti.TaichiNameError, match='Name "a" is not defined'):
+
+ @ti.kernel
+ def foo():
+ a + 1
+
+ foo()
diff --git a/tests/_python_orig/test_native_functions.py b/tests/_python_orig/test_native_functions.py
new file mode 100644
index 000000000..a0d537d01
--- /dev/null
+++ b/tests/_python_orig/test_native_functions.py
@@ -0,0 +1,81 @@
+import numpy as np
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_abs():
+ x = ti.field(ti.f32)
+
+ N = 16
+
+ ti.root.dense(ti.i, N).place(x)
+
+ @ti.kernel
+ def func():
+ for i in range(N):
+ x[i] = abs(-i)
+ print(x[i])
+ ti.static_print(x[i])
+
+ func()
+
+ for i in range(N):
+ assert x[i] == i
+
+
+@test_utils.test()
+def test_int():
+ x = ti.field(ti.f32)
+
+ N = 16
+
+ ti.root.dense(ti.i, N).place(x)
+
+ @ti.kernel
+ def func():
+ for i in range(N):
+ x[i] = int(x[i])
+ x[i] = float(int(x[i]) // 2)
+
+ for i in range(N):
+ x[i] = i + 0.4
+
+ func()
+
+ for i in range(N):
+ assert x[i] == i // 2
+
+
+@test_utils.test()
+def test_minmax():
+ x = ti.field(ti.f32)
+ y = ti.field(ti.f32)
+ z = ti.field(ti.f32)
+ minimum = ti.field(ti.f32)
+ maximum = ti.field(ti.f32)
+
+ N = 16
+
+ ti.root.dense(ti.i, N).place(x, y, z, minimum, maximum)
+
+ @ti.kernel
+ def func():
+ for i in range(N):
+ minimum[i] = min(x[i], y[i], z[i])
+ maximum[i] = max(x[i], y[i], z[i])
+
+ for i in range(N):
+ x[i] = i
+ y[i] = N - i
+ z[i] = i - 2 if i % 2 else i + 2
+
+ func()
+
+ assert np.allclose(
+ minimum.to_numpy(),
+ np.minimum(np.minimum(x.to_numpy(), y.to_numpy()), z.to_numpy()))
+ assert np.allclose(
+ maximum.to_numpy(),
+ np.maximum(np.maximum(x.to_numpy(), y.to_numpy()), z.to_numpy()))
diff --git a/tests/_python_orig/test_ndarray.py b/tests/_python_orig/test_ndarray.py
new file mode 100644
index 000000000..4cb6ebd70
--- /dev/null
+++ b/tests/_python_orig/test_ndarray.py
@@ -0,0 +1,579 @@
+import copy
+
+import numpy as np
+import pytest
+from taichi.lang import impl
+from taichi.lang.misc import get_host_arch_list
+from taichi.lang.util import has_pytorch
+
+import taichi as ti
+from tests import test_utils
+
+if has_pytorch():
+ import torch
+
+# properties
+
+data_types = [ti.i32, ti.f32, ti.i64, ti.f64]
+ndarray_shapes = [(), 8, (6, 12)]
+vector_dims = [3]
+matrix_dims = [(1, 2), (2, 3)]
+supported_archs_taichi_ndarray = [ti.cpu, ti.cuda, ti.opengl, ti.vulkan]
+
+
+def _test_scalar_ndarray(dtype, shape):
+ x = ti.ndarray(dtype, shape)
+
+ if isinstance(shape, tuple):
+ assert x.shape == shape
+ else:
+ assert x.shape == (shape, )
+ assert x.element_shape == ()
+
+ assert x.dtype == dtype
+
+
+@pytest.mark.parametrize('dtype', data_types)
+@pytest.mark.parametrize('shape', ndarray_shapes)
+@test_utils.test(arch=get_host_arch_list())
+def test_scalar_ndarray(dtype, shape):
+ _test_scalar_ndarray(dtype, shape)
+
+
+def _test_vector_ndarray(n, dtype, shape):
+ x = ti.Vector.ndarray(n, dtype, shape)
+
+ if isinstance(shape, tuple):
+ assert x.shape == shape
+ else:
+ assert x.shape == (shape, )
+ assert x.element_shape == (n, )
+
+ assert x.dtype == dtype
+ assert x.n == n
+
+
+@pytest.mark.parametrize('n', vector_dims)
+@pytest.mark.parametrize('dtype', data_types)
+@pytest.mark.parametrize('shape', ndarray_shapes)
+@test_utils.test(arch=get_host_arch_list())
+def test_vector_ndarray(n, dtype, shape):
+ _test_vector_ndarray(n, dtype, shape)
+
+
+def _test_matrix_ndarray(n, m, dtype, shape):
+ x = ti.Matrix.ndarray(n, m, dtype, shape)
+
+ if isinstance(shape, tuple):
+ assert x.shape == shape
+ else:
+ assert x.shape == (shape, )
+ assert x.element_shape == (n, m)
+
+ assert x.dtype == dtype
+ assert x.n == n
+ assert x.m == m
+
+
+@pytest.mark.parametrize('n,m', matrix_dims)
+@pytest.mark.parametrize('dtype', data_types)
+@pytest.mark.parametrize('shape', ndarray_shapes)
+@test_utils.test(arch=get_host_arch_list())
+def test_matrix_ndarray(n, m, dtype, shape):
+ _test_matrix_ndarray(n, m, dtype, shape)
+
+
+@pytest.mark.parametrize('dtype', [ti.f32, ti.f64])
+def test_default_fp_ndarray(dtype):
+ ti.init(arch=supported_archs_taichi_ndarray, default_fp=dtype)
+
+ x = ti.Vector.ndarray(2, float, ())
+
+ assert x.dtype == impl.get_runtime().default_fp
+
+
+@pytest.mark.parametrize('dtype', [ti.i32, ti.i64])
+def test_default_ip_ndarray(dtype):
+ ti.init(arch=supported_archs_taichi_ndarray, default_ip=dtype)
+
+ x = ti.Vector.ndarray(2, int, ())
+
+ assert x.dtype == impl.get_runtime().default_ip
+
+
+# access
+
+layouts = [ti.Layout.SOA, ti.Layout.AOS]
+
+
+@test_utils.test(arch=supported_archs_taichi_ndarray)
+def test_ndarray_1d():
+ n = 4
+
+ @ti.kernel
+ def run(x: ti.any_arr(), y: ti.any_arr()):
+ for i in range(n):
+ x[i] += i + y[i]
+
+ a = ti.ndarray(ti.i32, shape=(n, ))
+ for i in range(n):
+ a[i] = i * i
+ b = np.ones((n, ), dtype=np.int32)
+ run(a, b)
+ for i in range(n):
+ assert a[i] == i * i + i + 1
+ run(b, a)
+ for i in range(n):
+ assert b[i] == i * i + (i + 1) * 2
+
+
+def _test_ndarray_2d():
+ n = 4
+ m = 7
+
+ @ti.kernel
+ def run(x: ti.any_arr(), y: ti.any_arr()):
+ for i in range(n):
+ for j in range(m):
+ x[i, j] += i + j + y[i, j]
+
+ a = ti.ndarray(ti.i32, shape=(n, m))
+ for i in range(n):
+ for j in range(m):
+ a[i, j] = i * j
+ b = np.ones((n, m), dtype=np.int32)
+ run(a, b)
+ for i in range(n):
+ for j in range(m):
+ assert a[i, j] == i * j + i + j + 1
+ run(b, a)
+ for i in range(n):
+ for j in range(m):
+ assert b[i, j] == i * j + (i + j + 1) * 2
+
+
+@test_utils.test(arch=supported_archs_taichi_ndarray)
+def test_ndarray_2d():
+ _test_ndarray_2d()
+
+
+def _test_ndarray_copy_from_ndarray():
+ n = 16
+ a = ti.ndarray(ti.i32, shape=n)
+ b = ti.ndarray(ti.i32, shape=n)
+ a[0] = 1
+ a[4] = 2
+ b[0] = 4
+ b[4] = 5
+
+ a.copy_from(b)
+
+ assert a[0] == 4
+ assert a[4] == 5
+
+ x = ti.Vector.ndarray(10, ti.i32, 5, layout=ti.Layout.SOA)
+ y = ti.Vector.ndarray(10, ti.i32, 5, layout=ti.Layout.SOA)
+ x[1][0] = 1
+ x[2][4] = 2
+ y[1][0] = 4
+ y[2][4] = 5
+
+ x.copy_from(y)
+
+ assert x[1][0] == 4
+ assert x[2][4] == 5
+
+ x = ti.Matrix.ndarray(2, 2, ti.i32, 5, layout=ti.Layout.AOS)
+ y = ti.Matrix.ndarray(2, 2, ti.i32, 5, layout=ti.Layout.AOS)
+ x[0][0, 0] = 1
+ x[4][1, 0] = 3
+ y[0][0, 0] = 4
+ y[4][1, 0] = 6
+
+ x.copy_from(y)
+
+ assert x[0][0, 0] == 4
+ assert x[4][1, 0] == 6
+
+
+@test_utils.test(arch=supported_archs_taichi_ndarray)
+def test_ndarray_copy_from_ndarray():
+ _test_ndarray_copy_from_ndarray()
+
+
+def _test_ndarray_deepcopy():
+ n = 16
+ x = ti.ndarray(ti.i32, shape=n)
+ x[0] = 1
+ x[4] = 2
+
+ y = copy.deepcopy(x)
+
+ assert y.shape == x.shape
+ assert y.dtype == x.dtype
+ assert y[0] == 1
+ assert y[4] == 2
+ x[0] = 4
+ x[4] = 5
+ assert y[0] == 1
+ assert y[4] == 2
+
+ x = ti.Vector.ndarray(10, ti.i32, 5, layout=ti.Layout.SOA)
+ x[1][0] = 4
+ x[2][4] = 5
+
+ y = copy.deepcopy(x)
+
+ assert y.shape == x.shape
+ assert y.dtype == x.dtype
+ assert y.n == x.n
+ assert y.layout == x.layout
+ assert y[1][0] == 4
+ assert y[2][4] == 5
+ x[1][0] = 1
+ x[2][4] = 2
+ assert y[1][0] == 4
+ assert y[2][4] == 5
+
+ x = ti.Matrix.ndarray(2, 2, ti.i32, 5, layout=ti.Layout.AOS)
+ x[0][0, 0] = 7
+ x[4][1, 0] = 9
+
+ y = copy.deepcopy(x)
+
+ assert y.shape == x.shape
+ assert y.dtype == x.dtype
+ assert y.m == x.m
+ assert y.n == x.n
+ assert y.layout == x.layout
+ assert y[0][0, 0] == 7
+ assert y[4][1, 0] == 9
+ x[0][0, 0] = 3
+ x[4][1, 0] = 5
+ assert y[0][0, 0] == 7
+ assert y[4][1, 0] == 9
+
+
+def test_ndarray_cuda_caching_allocator():
+ ti.init(arch=ti.cuda, ndarray_use_cached_allocator=True)
+ n = 8
+ a = ti.ndarray(ti.i32, shape=(n))
+ a.fill(2)
+ a = 1
+ b = ti.ndarray(ti.i32, shape=(n))
+ b.fill(2)
+
+
+@test_utils.test(arch=supported_archs_taichi_ndarray)
+def test_ndarray_fill():
+ n = 8
+ a = ti.ndarray(ti.i32, shape=(n))
+ anp = np.ones((n, ), dtype=np.int32)
+ a.fill(2)
+ anp.fill(2)
+ assert (a.to_numpy() == anp).all()
+
+ b = ti.Vector.ndarray(4, ti.f32, shape=(n))
+ bnp = np.ones(shape=b.arr.shape, dtype=np.float32)
+ b.fill(2.5)
+ bnp.fill(2.5)
+ assert (b.to_numpy() == bnp).all()
+
+ c = ti.Matrix.ndarray(4, 4, ti.f32, shape=(n))
+ cnp = np.ones(shape=c.arr.shape, dtype=np.float32)
+ c.fill(1.5)
+ cnp.fill(1.5)
+ assert (c.to_numpy() == cnp).all()
+
+
+@test_utils.test(arch=supported_archs_taichi_ndarray)
+def test_ndarray_rw_cache():
+ a = ti.Vector.ndarray(3, ti.f32, ())
+ b = ti.Vector.ndarray(3, ti.f32, 12)
+
+ n = 1000
+ for i in range(n):
+ c_a = copy.deepcopy(a)
+ c_b = copy.deepcopy(b)
+ c_a[None] = c_b[10]
+
+
+@test_utils.test(arch=supported_archs_taichi_ndarray)
+def test_ndarray_deepcopy():
+ _test_ndarray_deepcopy()
+
+
+def _test_ndarray_numpy_io():
+ n = 7
+ m = 4
+ a = ti.ndarray(ti.i32, shape=(n, m))
+ a.fill(2)
+ b = ti.ndarray(ti.i32, shape=(n, m))
+ b.from_numpy(np.ones((n, m), dtype=np.int32) * 2)
+ assert (a.to_numpy() == b.to_numpy()).all()
+
+ d = 2
+ p = 4
+ x = ti.Vector.ndarray(d, ti.f32, p)
+ x.fill(2)
+ y = ti.Vector.ndarray(d, ti.f32, p)
+ y.from_numpy(np.ones((p, d), dtype=np.int32) * 2)
+ assert (x.to_numpy() == y.to_numpy()).all()
+
+ c = 2
+ d = 2
+ p = 4
+ x = ti.Matrix.ndarray(c, d, ti.f32, p)
+ x.fill(2)
+ y = ti.Matrix.ndarray(c, d, ti.f32, p)
+ y.from_numpy(np.ones((p, c, d), dtype=np.int32) * 2)
+ assert (x.to_numpy() == y.to_numpy()).all()
+
+
+@test_utils.test(arch=supported_archs_taichi_ndarray)
+def test_ndarray_numpy_io():
+ _test_ndarray_numpy_io()
+
+
+def _test_matrix_ndarray_python_scope(layout):
+ a = ti.Matrix.ndarray(2, 2, ti.i32, 5, layout=layout)
+ for i in range(5):
+ for j, k in ti.ndrange(2, 2):
+ a[i][j, k] = j * j + k * k
+ assert a[0][0, 0] == 0
+ assert a[1][0, 1] == 1
+ assert a[2][1, 0] == 1
+ assert a[3][1, 1] == 2
+ assert a[4][0, 1] == 1
+
+
+@pytest.mark.parametrize('layout', layouts)
+@test_utils.test(arch=supported_archs_taichi_ndarray)
+def test_matrix_ndarray_python_scope(layout):
+ _test_matrix_ndarray_python_scope(layout)
+
+
+def _test_matrix_ndarray_taichi_scope(layout):
+ @ti.kernel
+ def func(a: ti.any_arr()):
+ for i in range(5):
+ for j, k in ti.ndrange(2, 2):
+ a[i][j, k] = j * j + k * k
+
+ m = ti.Matrix.ndarray(2, 2, ti.i32, 5, layout=layout)
+ func(m)
+ assert m[0][0, 0] == 0
+ assert m[1][0, 1] == 1
+ assert m[2][1, 0] == 1
+ assert m[3][1, 1] == 2
+ assert m[4][0, 1] == 1
+
+
+@pytest.mark.parametrize('layout', layouts)
+@test_utils.test(arch=supported_archs_taichi_ndarray)
+def test_matrix_ndarray_taichi_scope(layout):
+ _test_matrix_ndarray_taichi_scope(layout)
+
+
+def _test_matrix_ndarray_taichi_scope_struct_for(layout):
+ @ti.kernel
+ def func(a: ti.any_arr()):
+ for i in a:
+ for j, k in ti.ndrange(2, 2):
+ a[i][j, k] = j * j + k * k
+
+ m = ti.Matrix.ndarray(2, 2, ti.i32, 5, layout=layout)
+ func(m)
+ assert m[0][0, 0] == 0
+ assert m[1][0, 1] == 1
+ assert m[2][1, 0] == 1
+ assert m[3][1, 1] == 2
+ assert m[4][0, 1] == 1
+
+
+@pytest.mark.parametrize('layout', layouts)
+@test_utils.test(arch=supported_archs_taichi_ndarray)
+def test_matrix_ndarray_taichi_scope_struct_for(layout):
+ _test_matrix_ndarray_taichi_scope_struct_for(layout)
+
+
+@pytest.mark.parametrize('layout', layouts)
+@test_utils.test(arch=supported_archs_taichi_ndarray)
+def test_vector_ndarray_python_scope(layout):
+ a = ti.Vector.ndarray(10, ti.i32, 5, layout=layout)
+ for i in range(5):
+ for j in range(4):
+ a[i][j * j] = j * j
+ assert a[0][9] == 9
+ assert a[1][0] == 0
+ assert a[2][1] == 1
+ assert a[3][4] == 4
+ assert a[4][9] == 9
+
+
+@pytest.mark.parametrize('layout', layouts)
+@test_utils.test(arch=supported_archs_taichi_ndarray)
+def test_vector_ndarray_taichi_scope(layout):
+ @ti.kernel
+ def func(a: ti.any_arr()):
+ for i in range(5):
+ for j in range(4):
+ a[i][j * j] = j * j
+
+ v = ti.Vector.ndarray(10, ti.i32, 5, layout=layout)
+ func(v)
+ assert v[0][9] == 9
+ assert v[1][0] == 0
+ assert v[2][1] == 1
+ assert v[3][4] == 4
+ assert v[4][9] == 9
+
+
+# number of compiled functions
+
+
+def _test_compiled_functions():
+ @ti.kernel
+ def func(a: ti.any_arr(element_dim=1)):
+ for i in range(5):
+ for j in range(4):
+ a[i][j * j] = j * j
+
+ v = ti.Vector.ndarray(10, ti.i32, 5)
+ func(v)
+ assert impl.get_runtime().get_num_compiled_functions() == 1
+ v = np.zeros((6, 10), dtype=np.int32)
+ func(v)
+ assert impl.get_runtime().get_num_compiled_functions() == 1
+ v = np.zeros((6, 11), dtype=np.int32)
+ func(v)
+ assert impl.get_runtime().get_num_compiled_functions() == 2
+ v = ti.Vector.ndarray(10, ti.i32, 5, layout=ti.Layout.SOA)
+ func(v)
+ assert impl.get_runtime().get_num_compiled_functions() == 3
+
+
+@test_utils.test(arch=supported_archs_taichi_ndarray)
+def test_compiled_functions():
+ _test_compiled_functions()
+
+
+# annotation compatibility
+
+
+def _test_arg_not_match():
+ @ti.kernel
+ def func1(a: ti.any_arr(element_dim=1)):
+ pass
+
+ x = ti.Matrix.ndarray(2, 3, ti.i32, shape=(4, 7))
+ with pytest.raises(
+ ValueError,
+ match=
+ r'Invalid argument into ti\.any_arr\(\) - required element_dim=1, but .* is provided'
+ ):
+ func1(x)
+
+ @ti.kernel
+ def func2(a: ti.any_arr(element_dim=2)):
+ pass
+
+ x = ti.Vector.ndarray(2, ti.i32, shape=(4, 7))
+ with pytest.raises(
+ ValueError,
+ match=
+ r'Invalid argument into ti\.any_arr\(\) - required element_dim=2, but .* is provided'
+ ):
+ func2(x)
+
+ @ti.kernel
+ def func3(a: ti.any_arr(layout=ti.Layout.AOS)):
+ pass
+
+ x = ti.Matrix.ndarray(2, 3, ti.i32, shape=(4, 7), layout=ti.Layout.SOA)
+ with pytest.raises(
+ ValueError,
+ match=
+ r'Invalid argument into ti\.any_arr\(\) - required layout=Layout\.AOS, but .* is provided'
+ ):
+ func3(x)
+
+ @ti.kernel
+ def func4(a: ti.any_arr(layout=ti.Layout.SOA)):
+ pass
+
+ x = ti.Vector.ndarray(2, ti.i32, shape=(4, 7))
+ with pytest.raises(
+ ValueError,
+ match=
+ r'Invalid argument into ti\.any_arr\(\) - required layout=Layout\.SOA, but .* is provided'
+ ):
+ func4(x)
+
+ @ti.kernel
+ def func5(a: ti.any_arr(element_shape=(2, 3))):
+ pass
+
+ x = ti.Vector.ndarray(2, ti.i32, shape=(4, 7))
+ with pytest.raises(
+ ValueError,
+ match=
+ r'Invalid argument into ti\.any_arr\(\) - required element_dim'):
+ func5(x)
+
+ with pytest.raises(
+ ValueError,
+ match=r'Both element_shape and element_dim are specified'):
+
+ @ti.kernel
+ def func6(a: ti.any_arr(element_dim=1, element_shape=(2, 3))):
+ pass
+
+ @ti.kernel
+ def func7(a: ti.any_arr(field_dim=2)):
+ pass
+
+ x = ti.ndarray(ti.i32, shape=(3, ))
+ with pytest.raises(
+ ValueError,
+ match=r'Invalid argument into ti\.any_arr\(\) - required field_dim'
+ ):
+ func7(x)
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_arg_not_match():
+ _test_arg_not_match()
+
+
+def _test_size_in_bytes():
+ a = ti.ndarray(ti.i32, 8)
+ assert a._get_element_size() == 4
+ assert a._get_nelement() == 8
+
+ b = ti.Vector.ndarray(10, ti.f64, 5)
+ assert b._get_element_size() == 8
+ assert b._get_nelement() == 50
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_size_in_bytes():
+ _test_size_in_bytes()
+
+
+@test_utils.test(arch=supported_archs_taichi_ndarray)
+def test_different_shape():
+ n1 = 4
+ x = ti.ndarray(dtype=ti.f32, shape=(n1, n1))
+
+ @ti.kernel
+ def init(d: ti.i32, arr: ti.any_arr()):
+ for i, j in arr:
+ arr[i, j] = d
+
+ init(2, x)
+ assert (x.to_numpy() == (np.ones(shape=(n1, n1)) * 2)).all()
+ n2 = 8
+ y = ti.ndarray(dtype=ti.f32, shape=(n2, n2))
+ init(3, y)
+ assert (y.to_numpy() == (np.ones(shape=(n2, n2)) * 3)).all()
diff --git a/tests/_python_orig/test_ndrange.py b/tests/_python_orig/test_ndrange.py
new file mode 100644
index 000000000..22def2d89
--- /dev/null
+++ b/tests/_python_orig/test_ndrange.py
@@ -0,0 +1,248 @@
+import numpy as np
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_1d():
+ x = ti.field(ti.f32, shape=(16))
+
+ @ti.kernel
+ def func():
+ for i in ti.ndrange((4, 10)):
+ x[i] = i
+
+ func()
+
+ for i in range(16):
+ if 4 <= i < 10:
+ assert x[i] == i
+ else:
+ assert x[i] == 0
+
+
+@test_utils.test()
+def test_2d():
+ x = ti.field(ti.f32, shape=(16, 32))
+
+ t = 8
+
+ @ti.kernel
+ def func():
+ for i, j in ti.ndrange((4, 10), (3, t)):
+ val = i + j * 10
+ x[i, j] = val
+
+ func()
+ for i in range(16):
+ for j in range(32):
+ if 4 <= i < 10 and 3 <= j < 8:
+ assert x[i, j] == i + j * 10
+ else:
+ assert x[i, j] == 0
+
+
+@test_utils.test()
+def test_3d():
+ x = ti.field(ti.f32, shape=(16, 32, 64))
+
+ @ti.kernel
+ def func():
+ for i, j, k in ti.ndrange((4, 10), (3, 8), 17):
+ x[i, j, k] = i + j * 10 + k * 100
+
+ func()
+ for i in range(16):
+ for j in range(32):
+ for k in range(64):
+ if 4 <= i < 10 and 3 <= j < 8 and k < 17:
+ assert x[i, j, k] == i + j * 10 + k * 100
+ else:
+ assert x[i, j, k] == 0
+
+
+@test_utils.test()
+def test_tensor_based_3d():
+ x = ti.field(ti.i32, shape=(6, 6, 6))
+ y = ti.field(ti.i32, shape=(6, 6, 6))
+
+ @ti.kernel
+ def func():
+ lower = ti.Vector([0, 1, 2])
+ upper = ti.Vector([3, 4, 5])
+ for I in ti.grouped(
+ ti.ndrange((lower[0], upper[0]), (lower[1], upper[1]),
+ (lower[2], upper[2]))):
+ x[I] = I[0] + I[1] + I[2]
+ for i in range(0, 3):
+ for j in range(1, 4):
+ for k in range(2, 5):
+ y[i, j, k] = i + j + k
+
+ func()
+
+ for i in range(6):
+ for j in range(6):
+ for k in range(6):
+ assert x[i, j, k] == y[i, j, k]
+
+
+@test_utils.test()
+def test_static_grouped():
+ x = ti.field(ti.f32, shape=(16, 32, 64))
+
+ @ti.kernel
+ def func():
+ for I in ti.static(ti.grouped(ti.ndrange((4, 5), (3, 5), 5))):
+ x[I] = I[0] + I[1] * 10 + I[2] * 100
+
+ func()
+ for i in range(16):
+ for j in range(32):
+ for k in range(64):
+ if 4 <= i < 5 and 3 <= j < 5 and k < 5:
+ assert x[i, j, k] == i + j * 10 + k * 100
+ else:
+ assert x[i, j, k] == 0
+
+
+@test_utils.test()
+def test_static_grouped_static():
+ x = ti.Matrix.field(2, 3, dtype=ti.f32, shape=(16, 4))
+
+ @ti.kernel
+ def func():
+ for i, j in ti.ndrange(16, 4):
+ for I in ti.static(ti.grouped(ti.ndrange(2, 3))):
+ x[i, j][I] = I[0] + I[1] * 10 + i + j * 4
+
+ func()
+ for i in range(16):
+ for j in range(4):
+ for k in range(2):
+ for l in range(3):
+ assert x[i, j][k, l] == k + l * 10 + i + j * 4
+
+
+@test_utils.test()
+def test_field_init_eye():
+ # https://github.com/taichi-dev/taichi/issues/1824
+
+ n = 32
+
+ A = ti.field(ti.f32, (n, n))
+
+ @ti.kernel
+ def init():
+ for i, j in ti.ndrange(n, n):
+ if i == j:
+ A[i, j] = 1
+
+ init()
+ assert np.allclose(A.to_numpy(), np.eye(n, dtype=np.float32))
+
+
+@test_utils.test()
+def test_ndrange_index_floordiv():
+ # https://github.com/taichi-dev/taichi/issues/1829
+
+ n = 10
+
+ A = ti.field(ti.f32, (n, n))
+
+ @ti.kernel
+ def init():
+ for i, j in ti.ndrange(n, n):
+ if i // 2 == 0:
+ A[i, j] = i
+
+ init()
+ for i in range(n):
+ for j in range(n):
+ if i // 2 == 0:
+ assert A[i, j] == i
+ else:
+ assert A[i, j] == 0
+
+
+@test_utils.test()
+def test_nested_ndrange():
+ # https://github.com/taichi-dev/taichi/issues/1829
+
+ n = 2
+
+ A = ti.field(ti.i32, (n, n, n, n))
+
+ @ti.kernel
+ def init():
+ for i, j in ti.ndrange(n, n):
+ for k, l in ti.ndrange(n, n):
+ r = i * n**3 + j * n**2 + k * n + l
+ A[i, j, k, l] = r
+
+ init()
+ for i in range(n):
+ for j in range(n):
+ for k in range(n):
+ for l in range(n):
+ r = i * n**3 + j * n**2 + k * n + l
+ assert A[i, j, k, l] == r
+
+
+@test_utils.test(ti.cpu)
+def test_ndrange_ast_transform():
+ n, u, v = 4, 3, 2
+
+ a = ti.field(ti.i32, ())
+ b = ti.field(ti.i32, ())
+ A = ti.field(ti.i32, (n, n))
+
+ @ti.kernel
+ def func():
+ # `__getitem__ cannot be called from Python-scope` will be raised if
+ # `a[None]` is not transformed to `ti.subscript(a, None)` in ti.ndrange:
+ for i, j in ti.ndrange(a[None], b[None]):
+ r = i * n + j + 1
+ A[i, j] = r
+
+ a[None] = u
+ b[None] = v
+
+ func()
+
+ for i in range(n):
+ for j in range(n):
+ if i < u and j < v:
+ r = i * n + j + 1
+ else:
+ r = 0
+ assert A[i, j] == r
+
+
+@test_utils.test()
+def test_grouped_ndrange_star():
+ @ti.kernel
+ def foo() -> ti.i32:
+ ret = 0
+ for I in ti.grouped(ti.ndrange(*[[1, 3]] * 3)):
+ ret += I[0] + I[1] + I[2]
+ return ret
+
+ assert foo() == 36
+
+
+@test_utils.test()
+def test_ndrange_three_arguments():
+ @ti.kernel
+ def foo():
+ for i in ti.ndrange((1, 2, 3)):
+ pass
+
+ with pytest.raises(
+ ti.TaichiSyntaxError,
+ match=
+ r"Every argument of ndrange should be a scalar or a tuple/list like \(begin, end\)"
+ ):
+ foo()
diff --git a/tests/_python_orig/test_nested_kernel_error.py b/tests/_python_orig/test_nested_kernel_error.py
new file mode 100644
index 000000000..605432263
--- /dev/null
+++ b/tests/_python_orig/test_nested_kernel_error.py
@@ -0,0 +1,18 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_nested_kernel_error():
+ @ti.kernel
+ def B():
+ pass
+
+ @ti.kernel
+ def A():
+ B()
+
+ with pytest.raises(ti.TaichiCompilationError):
+ A()
diff --git a/tests/_python_orig/test_new_allocator.py b/tests/_python_orig/test_new_allocator.py
new file mode 100644
index 000000000..361f9af04
--- /dev/null
+++ b/tests/_python_orig/test_new_allocator.py
@@ -0,0 +1,82 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_1d():
+ N = 16
+
+ x = ti.field(ti.f32, shape=(N, ))
+ y = ti.field(ti.f32, shape=(N, ))
+
+ @ti.kernel
+ def func():
+ for i in range(N):
+ y[i] = x[i]
+
+ for i in range(N):
+ x[i] = i * 2
+
+ func()
+
+ for i in range(N):
+ assert y[i] == i * 2
+
+
+@test_utils.test()
+def test_3d():
+ N = 2
+ M = 2
+
+ x = ti.field(ti.f32, shape=(N, M))
+ y = ti.field(ti.f32, shape=(N, M))
+
+ @ti.kernel
+ def func():
+ for I in ti.grouped(x):
+ y[I] = x[I]
+
+ for i in range(N):
+ for j in range(M):
+ x[i, j] = i * 10 + j
+
+ func()
+
+ for i in range(N):
+ for j in range(M):
+ assert y[i, j] == i * 10 + j
+
+
+@test_utils.test()
+def test_matrix():
+ N = 16
+
+ x = ti.Matrix.field(2, 2, dtype=ti.f32, shape=(N, ), layout=ti.Layout.AOS)
+
+ @ti.kernel
+ def func():
+ for i in range(N):
+ x[i][1, 1] = x[i][0, 0]
+
+ for i in range(N):
+ x[i][0, 0] = i + 3
+
+ func()
+
+ for i in range(N):
+ assert x[i][1, 1] == i + 3
+
+
+@test_utils.test()
+def test_alloc_in_kernel():
+ return # build bots may not have this much memory to tests...
+ x = ti.field(ti.f32)
+
+ ti.root.pointer(ti.i, 8192).dense(ti.i, 1024 * 1024).place(x)
+
+ @ti.kernel
+ def touch():
+ for i in range(4096):
+ x[i * 1024 * 1024] = 1
+
+ touch()
diff --git a/tests/_python_orig/test_no_activate.py b/tests/_python_orig/test_no_activate.py
new file mode 100644
index 000000000..43311f2c1
--- /dev/null
+++ b/tests/_python_orig/test_no_activate.py
@@ -0,0 +1,30 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_no_activate():
+ x = ti.field(ti.f32)
+
+ n = 1024
+
+ d = ti.root.dynamic(ti.i, n, chunk_size=32)
+ d.place(x)
+
+ @ti.kernel
+ def initialize():
+ for i in range(n):
+ x[i] = 1
+
+ @ti.kernel
+ def func():
+ ti.no_activate(d)
+ for i in range(n // 2):
+ x[i * 2 + 1] += 1
+
+ initialize()
+
+ func()
+
+ for i in range(n):
+ assert x[i] == i % 2 + 1
diff --git a/tests/_python_orig/test_no_grad.py b/tests/_python_orig/test_no_grad.py
new file mode 100644
index 000000000..e89275e0b
--- /dev/null
+++ b/tests/_python_orig/test_no_grad.py
@@ -0,0 +1,45 @@
+import numpy as np
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_no_grad():
+ x = ti.field(ti.f32)
+ loss = ti.field(ti.f32)
+
+ N = 1
+
+ # no gradients allocated for x
+ ti.root.dense(ti.i, N).place(x)
+ ti.root.place(loss, loss.grad)
+
+ @ti.kernel
+ def func():
+ for i in range(N):
+ ti.atomic_add(loss[None], x[i]**2)
+
+ with ti.Tape(loss):
+ func()
+
+
+@test_utils.test()
+def test_raise_no_gradient():
+ y = ti.field(shape=(), name='y', dtype=ti.f32, needs_grad=True)
+ x = ti.field(shape=(), name='x', dtype=ti.f32)
+ z = np.array([1.0])
+
+ @ti.kernel
+ def func(x: ti.template()):
+ y[None] = x.grad[None] * x.grad[None]
+ z[0] = x.grad[None]
+
+ x[None] = 5.
+ with pytest.raises(
+ ti.TaichiCompilationError,
+ match=
+ 'Gradient x.grad has not been placed, check whether `needs_grad=True`'
+ ):
+ func(x)
diff --git a/tests/_python_orig/test_non_taichi_types_in_kernel.py b/tests/_python_orig/test_non_taichi_types_in_kernel.py
new file mode 100644
index 000000000..96ed9032e
--- /dev/null
+++ b/tests/_python_orig/test_non_taichi_types_in_kernel.py
@@ -0,0 +1,20 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_subscript_user_classes_in_kernel():
+ class MyList:
+ def __init__(self, elements):
+ self.elements = elements
+
+ def __getitem__(self, index):
+ return self.elements[index]
+
+ @ti.kernel
+ def func():
+ for i in ti.static(range(3)):
+ print(a[i])
+
+ a = MyList([1, 2, 3])
+ func()
diff --git a/tests/_python_orig/test_numpy.py b/tests/_python_orig/test_numpy.py
new file mode 100644
index 000000000..58019da6e
--- /dev/null
+++ b/tests/_python_orig/test_numpy.py
@@ -0,0 +1,222 @@
+import numpy as np
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+def with_data_type(dt):
+ val = ti.field(ti.i32)
+
+ n = 4
+
+ ti.root.dense(ti.i, n).place(val)
+
+ @ti.kernel
+ def test_numpy(arr: ti.ext_arr()):
+ for i in range(n):
+ arr[i] = arr[i]**2
+
+ a = np.array([4, 8, 1, 24], dtype=dt)
+
+ for i in range(n):
+ a[i] = i * 2
+
+ test_numpy(a)
+
+ for i in range(n):
+ assert a[i] == i * i * 4
+
+
+@test_utils.test()
+def test_numpy_f32():
+ with_data_type(np.float32)
+
+
+@test_utils.test(require=ti.extension.data64)
+def test_numpy_f64():
+ with_data_type(np.float64)
+
+
+@test_utils.test()
+def test_numpy_i32():
+ with_data_type(np.int32)
+
+
+@test_utils.test(require=ti.extension.data64)
+def test_numpy_i64():
+ with_data_type(np.int64)
+
+
+@test_utils.test()
+def test_numpy_2d():
+ val = ti.field(ti.i32)
+
+ n = 4
+ m = 7
+
+ ti.root.dense(ti.i, n).dense(ti.j, m).place(val)
+
+ @ti.kernel
+ def test_numpy(arr: ti.ext_arr()):
+ for i in range(n):
+ for j in range(m):
+ arr[i, j] += i + j
+
+ a = np.empty(shape=(n, m), dtype=np.int32)
+
+ for i in range(n):
+ for j in range(m):
+ a[i, j] = i * j
+
+ test_numpy(a)
+
+ for i in range(n):
+ for j in range(m):
+ assert a[i, j] == i * j + i + j
+
+
+@test_utils.test()
+def test_numpy_2d_transpose():
+ val = ti.field(ti.i32)
+
+ n = 8
+ m = 8
+
+ ti.root.dense(ti.ij, (n, m)).place(val)
+
+ @ti.kernel
+ def test_numpy(arr: ti.ext_arr()):
+ for i in ti.grouped(val):
+ val[i] = arr[i]
+
+ a = np.empty(shape=(n, m), dtype=np.int32)
+
+ for i in range(n):
+ for j in range(m):
+ a[i, j] = i * j + i * 4
+
+ test_numpy(a.transpose())
+
+ for i in range(n):
+ for j in range(m):
+ assert val[i, j] == i * j + j * 4
+
+
+@test_utils.test()
+def test_numpy_3d():
+ val = ti.field(ti.i32)
+
+ n = 4
+ m = 7
+ p = 11
+
+ ti.root.dense(ti.i, n).dense(ti.j, m).dense(ti.k, p).place(val)
+
+ @ti.kernel
+ def test_numpy(arr: ti.ext_arr()):
+ for i in range(n):
+ for j in range(m):
+ for k in range(p):
+ arr[i, j, k] += i + j + k * 2
+
+ a = np.empty(shape=(n, m, p), dtype=np.int32)
+
+ for i in range(n):
+ for j in range(m):
+ for k in range(p):
+ a[i, j, k] = i * j * (k + 1)
+
+ test_numpy(a)
+
+ for i in range(n):
+ for j in range(m):
+ for k in range(p):
+ assert a[i, j, k] == i * j * (k + 1) + i + j + k * 2
+
+
+@test_utils.test()
+def test_numpy_3d_error():
+ val = ti.field(ti.i32)
+
+ n = 4
+ m = 7
+ p = 11
+
+ ti.root.dense(ti.i, n).dense(ti.j, m).dense(ti.k, p).place(val)
+
+ @ti.kernel
+ def test_numpy(arr: ti.ext_arr()):
+ for i in range(n):
+ for j in range(m):
+ for k in range(p):
+ arr[i, j] += i + j + k * 2
+
+ a = np.empty(shape=(n, m, p), dtype=np.int32)
+
+ with pytest.raises(ti.TaichiCompilationError):
+ test_numpy(a)
+
+
+@test_utils.test()
+def test_numpy_multiple_external_arrays():
+
+ n = 4
+
+ @ti.kernel
+ def test_numpy(a: ti.ext_arr(), b: ti.ext_arr()):
+ for i in range(n):
+ a[i] = a[i] * b[i]
+ b[i] = a[i] + b[i]
+
+ a = np.array([4, 8, 1, 24], dtype=np.int32)
+ b = np.array([5, 6, 12, 3], dtype=np.int32)
+ c = a * b
+ d = c + b
+
+ test_numpy(a, b)
+ for i in range(n):
+ assert a[i] == c[i]
+ assert b[i] == d[i]
+
+
+@test_utils.test()
+def test_index_mismatch():
+ with pytest.raises(AssertionError):
+ val = ti.field(ti.i32, shape=(1, 2, 3))
+ val[0, 0] = 1
+
+
+@test_utils.test()
+def test_numpy_zero():
+ @ti.kernel
+ def test_numpy(arr: ti.ext_arr()):
+ pass
+
+ test_numpy(np.empty(shape=(0), dtype=np.int32))
+ test_numpy(np.empty(shape=(0, 5), dtype=np.int32))
+ test_numpy(np.empty(shape=(5, 0), dtype=np.int32))
+
+
+@test_utils.test()
+def test_numpy_struct_for():
+ @ti.kernel
+ def func1(a: ti.any_arr()):
+ for i, j in a:
+ a[i, j] = i + j
+
+ m = np.zeros((123, 456), dtype=np.int32)
+ func1(m)
+ for i in range(123):
+ for j in range(456):
+ assert m[i, j] == i + j
+
+ @ti.kernel
+ def func2(a: ti.any_arr()):
+ for I in ti.grouped(a):
+ a[I] = I.sum()
+
+ n = np.zeros((98, 76, 54), dtype=np.int32)
+ func2(n)
+ for i, j, k in ti.ndrange(98, 76, 54):
+ assert n[i, j, k] == i + j + k
diff --git a/tests/_python_orig/test_numpy_io.py b/tests/_python_orig/test_numpy_io.py
new file mode 100644
index 000000000..e2795ff8a
--- /dev/null
+++ b/tests/_python_orig/test_numpy_io.py
@@ -0,0 +1,162 @@
+import numpy as np
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_to_numpy_2d():
+ val = ti.field(ti.i32)
+
+ n = 4
+ m = 7
+
+ ti.root.dense(ti.ij, (n, m)).place(val)
+
+ for i in range(n):
+ for j in range(m):
+ val[i, j] = i + j * 3
+
+ arr = val.to_numpy()
+
+ assert arr.shape == (4, 7)
+ for i in range(n):
+ for j in range(m):
+ assert arr[i, j] == i + j * 3
+
+
+@test_utils.test()
+def test_from_numpy_2d():
+ val = ti.field(ti.i32)
+
+ n = 4
+ m = 7
+
+ ti.root.dense(ti.ij, (n, m)).place(val)
+
+ arr = np.empty(shape=(n, m), dtype=np.int32)
+
+ for i in range(n):
+ for j in range(m):
+ arr[i, j] = i + j * 3
+
+ val.from_numpy(arr)
+
+ for i in range(n):
+ for j in range(m):
+ assert val[i, j] == i + j * 3
+
+
+@test_utils.test()
+def test_to_numpy_struct():
+ n = 16
+ f = ti.Struct.field({"a": ti.i32, "b": ti.f32}, shape=(n, ))
+
+ for i in range(n):
+ f[i].a = i
+ f[i].b = f[i].a * 2
+
+ arr_dict = f.to_numpy()
+
+ for i in range(n):
+ assert arr_dict["a"][i] == i
+ assert arr_dict["b"][i] == i * 2
+
+
+@test_utils.test()
+def test_from_numpy_struct():
+ n = 16
+ f = ti.Struct.field({"a": ti.i32, "b": ti.f32}, shape=(n, ))
+
+ arr_dict = {
+ "a": np.arange(n, dtype=np.int32),
+ "b": np.arange(n, dtype=np.int32) * 2,
+ }
+
+ f.from_numpy(arr_dict)
+
+ for i in range(n):
+ assert f[i].a == i
+ assert f[i].b == i * 2
+
+
+@test_utils.test(require=ti.extension.data64)
+def test_f64():
+ val = ti.field(ti.f64)
+
+ n = 4
+ m = 7
+
+ ti.root.dense(ti.ij, (n, m)).place(val)
+
+ for i in range(n):
+ for j in range(m):
+ val[i, j] = (i + j * 3) * 1e100
+
+ val.from_numpy(val.to_numpy() * 2)
+
+ for i in range(n):
+ for j in range(m):
+ assert val[i, j] == (i + j * 3) * 2e100
+
+
+@test_utils.test()
+def test_matrix():
+ n = 4
+ m = 7
+ val = ti.Matrix.field(2, 3, ti.f32, shape=(n, m))
+
+ nparr = np.empty(shape=(n, m, 2, 3), dtype=np.float32)
+ for i in range(n):
+ for j in range(m):
+ for k in range(2):
+ for l in range(3):
+ nparr[i, j, k, l] = i + j * 2 - k - l * 3
+
+ val.from_numpy(nparr)
+ new_nparr = val.to_numpy()
+ assert (nparr == new_nparr).all()
+
+
+@test_utils.test()
+def test_numpy_io_example():
+ n = 4
+ m = 7
+
+ # Taichi tensors
+ val = ti.field(ti.i32, shape=(n, m))
+ vec = ti.Vector.field(3, dtype=ti.i32, shape=(n, m))
+ mat = ti.Matrix.field(3, 4, dtype=ti.i32, shape=(n, m))
+
+ # Scalar
+ arr = np.ones(shape=(n, m), dtype=np.int32)
+ val.from_numpy(arr)
+ arr = val.to_numpy()
+
+ # Vector
+ arr = np.ones(shape=(n, m, 3), dtype=np.int32)
+ vec.from_numpy(arr)
+
+ arr = np.ones(shape=(n, m, 3, 1), dtype=np.int32)
+ vec.from_numpy(arr)
+
+ arr = np.ones(shape=(n, m, 1, 3), dtype=np.int32)
+ vec.from_numpy(arr)
+
+ arr = vec.to_numpy()
+ assert arr.shape == (n, m, 3)
+
+ arr = vec.to_numpy(keep_dims=True)
+ assert arr.shape == (n, m, 3, 1)
+
+ # Matrix
+ arr = np.ones(shape=(n, m, 3, 4), dtype=np.int32)
+ mat.from_numpy(arr)
+
+ arr = mat.to_numpy()
+ assert arr.shape == (n, m, 3, 4)
+
+ arr = mat.to_numpy(keep_dims=True)
+ assert arr.shape == (n, m, 3, 4)
+
+ # For PyTorch tensors, use to_torch/from_torch instead
diff --git a/tests/_python_orig/test_offload.py b/tests/_python_orig/test_offload.py
new file mode 100644
index 000000000..f1ebfe7bd
--- /dev/null
+++ b/tests/_python_orig/test_offload.py
@@ -0,0 +1,84 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_running_loss():
+ return
+ steps = 16
+
+ total_loss = ti.field(ti.f32)
+ running_loss = ti.field(ti.f32)
+ additional_loss = ti.field(ti.f32)
+
+ ti.root.place(total_loss)
+ ti.root.dense(ti.i, steps).place(running_loss)
+ ti.root.place(additional_loss)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def compute_loss():
+ total_loss[None] = 0.0
+ for i in range(steps):
+ ti.atomic_add(total_loss[None], running_loss[i] * 2)
+ ti.atomic_add(total_loss[None], additional_loss[None] * 3)
+
+ compute_loss()
+
+ assert total_loss.grad[None] == 1
+ for i in range(steps):
+ assert running_loss[i] == 2
+ assert additional_loss.grad[None] == 3
+
+
+@test_utils.test()
+def test_reduce_separate():
+ a = ti.field(ti.f32, shape=(16))
+ b = ti.field(ti.f32, shape=(4))
+ c = ti.field(ti.f32, shape=())
+
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def reduce1():
+ for i in range(16):
+ b[i // 4] += a[i]
+
+ @ti.kernel
+ def reduce2():
+ for i in range(4):
+ c[None] += b[i]
+
+ c.grad[None] = 1
+ reduce2.grad()
+ reduce1.grad()
+
+ for i in range(4):
+ assert b.grad[i] == 1
+ for i in range(16):
+ assert a.grad[i] == 1
+
+
+@test_utils.test()
+def test_reduce_merged():
+ a = ti.field(ti.f32, shape=(16))
+ b = ti.field(ti.f32, shape=(4))
+ c = ti.field(ti.f32, shape=())
+
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def reduce():
+ for i in range(16):
+ b[i // 4] += a[i]
+
+ for i in range(4):
+ c[None] += b[i]
+
+ c.grad[None] = 1
+ reduce.grad()
+
+ for i in range(4):
+ assert b.grad[i] == 1
+ for i in range(16):
+ assert a.grad[i] == 1
diff --git a/tests/_python_orig/test_offload_cross.py b/tests/_python_orig/test_offload_cross.py
new file mode 100644
index 000000000..4a2b6d313
--- /dev/null
+++ b/tests/_python_orig/test_offload_cross.py
@@ -0,0 +1,135 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_offload_with_cross_block_locals():
+ ret = ti.field(ti.f32)
+
+ ti.root.place(ret)
+
+ @ti.kernel
+ def ker():
+ s = 0
+ for i in range(10):
+ s += i
+ ret[None] = s
+
+ ker()
+
+ assert ret[None] == 45
+
+
+@test_utils.test()
+def test_offload_with_cross_block_locals2():
+ ret = ti.field(ti.f32)
+
+ ti.root.place(ret)
+
+ @ti.kernel
+ def ker():
+ s = 0
+ for i in range(10):
+ s += i
+ ret[None] = s
+ s = ret[None] * 2
+ for i in range(10):
+ ti.atomic_add(ret[None], s)
+
+ ker()
+
+ assert ret[None] == 45 * 21
+
+
+@test_utils.test()
+def test_offload_with_cross_block_locals3():
+ ret = ti.field(ti.f32, shape=())
+
+ @ti.kernel
+ def ker():
+ s = 1
+ t = s
+ for i in range(10):
+ s += i
+ ret[None] = t
+
+ ker()
+
+ assert ret[None] == 1
+
+
+@test_utils.test()
+def test_offload_with_cross_block_locals4():
+ ret = ti.field(ti.f32, shape=())
+
+ @ti.kernel
+ def ker():
+ a = 1
+ b = 0
+ for i in range(10):
+ b += a
+ ret[None] = b
+
+ ker()
+
+ assert ret[None] == 10
+
+
+@test_utils.test()
+def test_offload_with_flexible_bounds():
+ s = ti.field(ti.i32, shape=())
+ lower = ti.field(ti.i32, shape=())
+ upper = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def ker():
+ for i in range(lower[None], upper[None]):
+ s[None] += i
+
+ lower[None] = 10
+ upper[None] = 20
+ ker()
+
+ assert s[None] == 29 * 10 // 2
+
+
+@test_utils.test()
+def test_offload_with_cross_block_globals():
+ ret = ti.field(ti.f32)
+
+ ti.root.place(ret)
+
+ @ti.kernel
+ def ker():
+ ret[None] = 0
+ for i in range(10):
+ ret[None] += i
+ ret[None] += 1
+
+ ker()
+
+ assert ret[None] == 46
+
+
+@test_utils.test()
+def test_offload_with_cross_nested_for():
+ @ti.kernel
+ def run(a: ti.i32):
+ b = a + 1
+ for x in range(1):
+ for i in range(b):
+ print('OK')
+
+ run(2)
+
+
+@test_utils.test()
+def test_offload_with_cross_if_inside_for():
+ @ti.kernel
+ def run(a: ti.i32):
+ b = a > 2
+ for x in range(1):
+ if b:
+ print('OK')
+
+ run(2)
diff --git a/tests/_python_orig/test_offset.py b/tests/_python_orig/test_offset.py
new file mode 100644
index 000000000..fd41b732a
--- /dev/null
+++ b/tests/_python_orig/test_offset.py
@@ -0,0 +1,141 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_accessor():
+ a = ti.field(dtype=ti.i32)
+
+ ti.root.dense(ti.ijk, 128).place(a, offset=(1024, 2048, 2100))
+
+ a[1029, 2100, 2200] = 1
+ assert a[1029, 2100, 2200] == 1
+
+
+@test_utils.test()
+def test_struct_for_huge_offsets():
+ a = ti.field(dtype=ti.i32)
+
+ offset = 1024, 2048, 2100, 2200
+ ti.root.dense(ti.ijkl, 4).place(a, offset=offset)
+
+ @ti.kernel
+ def test():
+ for i, j, k, l in a:
+ a[i, j, k, l] = i + j * 10 + k * 100 + l * 1000
+
+ test()
+
+ for i in range(offset[0], offset[0] + 4):
+ for j in range(offset[1], offset[1] + 4):
+ for k in range(offset[2], offset[2] + 4):
+ for l in range(offset[3], offset[3] + 4):
+ assert a[i, j, k, l] == i + j * 10 + k * 100 + l * 1000
+
+
+@test_utils.test()
+def test_struct_for_negative():
+ a = ti.field(dtype=ti.i32)
+
+ offset = 16, -16
+ ti.root.dense(ti.ij, 32).place(a, offset=offset)
+
+ @ti.kernel
+ def test():
+ for i, j in a:
+ a[i, j] = i + j * 10
+
+ test()
+
+ for i in range(16, 48):
+ for j in range(-16, 16):
+ assert a[i, j] == i + j * 10
+
+
+@test_utils.test()
+def test_offset_for_var():
+ a = ti.field(dtype=ti.i32, shape=16, offset=-48)
+ b = ti.field(dtype=ti.i32, shape=(16, ), offset=(16, ))
+ c = ti.field(dtype=ti.i32, shape=(16, 64), offset=(-16, -64))
+ d = ti.field(dtype=ti.i32, shape=(16, 64), offset=None)
+
+ offset = 4, -4
+ shape = 16, 16
+ e = ti.field(dtype=ti.i32, shape=shape, offset=offset)
+
+ @ti.kernel
+ def test():
+ for i, j in e:
+ e[i, j] = i * j
+
+ test()
+ for i in range(4, 20):
+ for j in range(-4, 12):
+ assert e[i, j] == i * j
+
+
+@test_utils.test()
+def test_offset_for_vector():
+ a = ti.field(dtype=ti.i32, shape=16, offset=-48)
+ b = ti.field(dtype=ti.i32, shape=16, offset=None)
+
+ offset = 16
+ shape = 16
+ c = ti.Vector.field(n=1, dtype=ti.i32, shape=shape, offset=offset)
+
+ @ti.kernel
+ def test():
+ for i in c:
+ c[i][0] = 2 * i
+
+ test()
+ for i in range(offset, offset + shape, 1):
+ assert c[i][0] == 2 * i
+
+
+@test_utils.test()
+def test_offset_for_matrix():
+ a = ti.Matrix.field(3,
+ 3,
+ shape=(16, 16),
+ offset=(-16, 16),
+ dtype=ti.float32)
+
+ @ti.kernel
+ def test():
+ for i, j in a:
+ for m in range(3):
+ a[i, j][0, 0] = i + j
+
+ test()
+
+ for i in range(-16, 0):
+ for j in range(16, 32):
+ assert a[i, j][0, 0] == i + j
+
+
+@test_utils.test()
+def test_offset_must_throw_var():
+ with pytest.raises(AssertionError):
+ a = ti.field(dtype=ti.float32, shape=3, offset=(3, 4))
+ b = ti.field(dtype=ti.float32, shape=None, offset=(3, 4))
+
+
+@test_utils.test()
+def test_offset_must_throw_vector():
+ with pytest.raises(AssertionError):
+ a = ti.Vector.field(3, dtype=ti.float32, shape=3, offset=(3, 4))
+ b = ti.Vector.field(3, dtype=ti.float32, shape=None, offset=(3, ))
+
+
+@test_utils.test()
+def test_offset_must_throw_matrix():
+ with pytest.raises(AssertionError):
+ c = ti.Matrix.field(3,
+ 3,
+ dtype=ti.i32,
+ shape=(32, 16, 8),
+ offset=(32, 16))
+ d = ti.Matrix.field(3, 3, dtype=ti.i32, shape=None, offset=(32, 16))
diff --git a/tests/_python_orig/test_oop.py b/tests/_python_orig/test_oop.py
new file mode 100644
index 000000000..059d774f9
--- /dev/null
+++ b/tests/_python_orig/test_oop.py
@@ -0,0 +1,266 @@
+import pytest
+from taichi.lang.misc import get_host_arch_list
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_classfunc():
+ @ti.data_oriented
+ class Array2D:
+ def __init__(self, n, m):
+ self.n = n
+ self.m = m
+ self.val = ti.field(ti.f32, shape=(n, m))
+
+ @ti.func
+ def inc(self, i, j):
+ self.val[i, j] += i * j
+
+ @ti.func
+ def mul(self, i, j):
+ return i * j
+
+ @ti.kernel
+ def fill(self):
+ for i, j in self.val:
+ self.inc(i, j)
+ self.val[i, j] += self.mul(i, j)
+
+ arr = Array2D(128, 128)
+
+ arr.fill()
+
+ for i in range(arr.n):
+ for j in range(arr.m):
+ assert arr.val[i, j] == i * j * 2
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_oop():
+ @ti.data_oriented
+ class Array2D:
+ def __init__(self, n, m, increment):
+ self.n = n
+ self.m = m
+ self.val = ti.field(ti.f32)
+ self.total = ti.field(ti.f32)
+ self.increment = increment
+
+ ti.root.dense(ti.ij, (self.n, self.m)).place(self.val)
+ ti.root.place(self.total)
+
+ @ti.kernel
+ def inc(self):
+ for i, j in self.val:
+ self.val[i, j] += self.increment
+
+ @ti.kernel
+ def inc2(self, increment: ti.i32):
+ for i, j in self.val:
+ self.val[i, j] += increment
+
+ @ti.kernel
+ def reduce(self):
+ for i, j in self.val:
+ self.total[None] += self.val[i, j] * 4
+
+ arr = Array2D(128, 128, 3)
+
+ double_total = ti.field(ti.f32)
+
+ ti.root.place(double_total)
+ ti.root.lazy_grad()
+
+ arr.inc()
+ arr.inc.grad()
+ assert arr.val[3, 4] == 3
+ arr.inc2(4)
+ assert arr.val[3, 4] == 7
+
+ with ti.Tape(loss=arr.total):
+ arr.reduce()
+
+ for i in range(arr.n):
+ for j in range(arr.m):
+ assert arr.val.grad[i, j] == 4
+
+ @ti.kernel
+ def double():
+ double_total[None] = 2 * arr.total[None]
+
+ with ti.Tape(loss=double_total):
+ arr.reduce()
+ double()
+
+ for i in range(arr.n):
+ for j in range(arr.m):
+ assert arr.val.grad[i, j] == 8
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_oop_two_items():
+ @ti.data_oriented
+ class Array2D:
+ def __init__(self, n, m, increment, multiplier):
+ self.n = n
+ self.m = m
+ self.val = ti.field(ti.f32)
+ self.total = ti.field(ti.f32)
+ self.increment = increment
+ self.multiplier = multiplier
+ ti.root.dense(ti.ij, (self.n, self.m)).place(self.val)
+ ti.root.place(self.total)
+
+ @ti.kernel
+ def inc(self):
+ for i, j in self.val:
+ self.val[i, j] += self.increment
+
+ @ti.kernel
+ def reduce(self):
+ for i, j in self.val:
+ self.total[None] += self.val[i, j] * self.multiplier
+
+ arr1_inc, arr1_mult = 3, 4
+ arr2_inc, arr2_mult = 6, 8
+ arr1 = Array2D(128, 128, arr1_inc, arr1_mult)
+ arr2 = Array2D(16, 32, arr2_inc, arr2_mult)
+
+ ti.root.lazy_grad()
+
+ arr1.inc()
+ arr1.inc.grad()
+ arr2.inc()
+ arr2.inc.grad()
+ assert arr1.val[3, 4] == arr1_inc
+ assert arr2.val[8, 6] == arr2_inc
+
+ with ti.Tape(loss=arr1.total):
+ arr1.reduce()
+ with ti.Tape(loss=arr2.total, clear_gradients=False):
+ arr2.reduce()
+ for i in range(arr1.n):
+ for j in range(arr1.m):
+ assert arr1.val.grad[i, j] == arr1_mult
+ for i in range(arr2.n):
+ for j in range(arr2.m):
+ assert arr2.val.grad[i, j] == arr2_mult
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_oop_inherit_ok():
+ # Array1D inherits from object, which makes the callstack being 'class Array2D(object)'
+ # instead of '@ti.data_oriented'. Make sure this also works.
+ @ti.data_oriented
+ class Array1D(object):
+ def __init__(self, n, mul):
+ self.n = n
+ self.val = ti.field(ti.f32)
+ self.total = ti.field(ti.f32)
+ self.mul = mul
+ ti.root.dense(ti.ij, (self.n, )).place(self.val)
+ ti.root.place(self.total)
+
+ @ti.kernel
+ def reduce(self):
+ for i, j in self.val:
+ self.total[None] += self.val[i, j] * self.mul
+
+ arr = Array1D(128, 42)
+
+ ti.root.lazy_grad()
+
+ with ti.Tape(loss=arr.total):
+ arr.reduce()
+ for i in range(arr.n):
+ for j in range(arr.n):
+ assert arr.val.grad[i, j] == 42
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_oop_class_must_be_data_oriented():
+ class Array1D(object):
+ def __init__(self, n, mul):
+ self.n = n
+ self.val = ti.field(ti.f32)
+ self.total = ti.field(ti.f32)
+ self.mul = mul
+ ti.root.dense(ti.ij, (self.n, )).place(self.val)
+ ti.root.place(self.total)
+
+ @ti.kernel
+ def reduce(self):
+ for i, j in self.val:
+ self.total[None] += self.val[i, j] * self.mul
+
+ arr = Array1D(128, 42)
+
+ ti.root.lazy_grad()
+
+ # Array1D is not properly decorated, this will raise an Exception
+ with pytest.raises(ti.TaichiSyntaxError):
+ arr.reduce()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_hook():
+ @ti.data_oriented
+ class Solver:
+ def __init__(self, n, m, hook):
+ self.val = ti.field(ti.f32, shape=(n, m))
+ self.hook = hook
+
+ def run_hook(self):
+ self.hook(self.val)
+
+ @ti.kernel
+ def hook(x: ti.template()):
+ for i, j in x:
+ x[i, j] = 1.0
+
+ solver = Solver(32, 32, hook)
+ solver.run_hook()
+
+ for i in range(32):
+ for j in range(32):
+ assert (solver.val[i, j] == 1.0)
+
+
+@test_utils.test()
+def test_oop_with_portery_decorator():
+ @ti.data_oriented
+ class TestPortery:
+ @property
+ @ti.kernel
+ def kernel_property(self) -> ti.i32:
+ return 42
+
+ @property
+ def raw_proterty(self):
+ return 3
+
+ a = TestPortery()
+ assert a.kernel_property == 42
+
+ assert a.raw_proterty == 3
+
+
+@test_utils.test()
+def test_oop_with_static_decorator():
+ @ti.data_oriented
+ class TestStatic:
+ @staticmethod
+ @ti.kernel
+ def kernel_static() -> ti.i32:
+ return 42
+
+ @staticmethod
+ def raw_static():
+ return 3
+
+ a = TestStatic()
+ assert a.kernel_static() == 42
+
+ assert a.raw_static() == 3
diff --git a/tests/_python_orig/test_optimization.py b/tests/_python_orig/test_optimization.py
new file mode 100644
index 000000000..258fd41fa
--- /dev/null
+++ b/tests/_python_orig/test_optimization.py
@@ -0,0 +1,147 @@
+from taichi.lang.misc import serialize
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_advanced_store_forwarding_nested_loops():
+ val = ti.field(ti.i32)
+ ti.root.place(val)
+
+ @ti.kernel
+ def func():
+ # If we want to do store-forwarding to local loads inside loops,
+ # we should pass the last local store into the loop, rather than use
+ # an empty AllocaOptimize loop.
+ # See https://github.com/taichi-dev/taichi/pull/849.
+ a = val[None]
+ for i in range(1):
+ for j in range(1):
+ val[None] = a
+
+ val[None] = 10
+ func()
+ assert val[None] == 10
+
+
+@test_utils.test()
+def test_advanced_unused_store_elimination_if():
+ val = ti.field(ti.i32)
+ ti.root.place(val)
+
+ @ti.kernel
+ def func():
+ a = 1
+ if val[None]:
+ a = 2
+ if val[None]:
+ a = 3
+ else:
+ a = 4
+ val[None] = a
+ else:
+ val[None] = a
+
+ val[None] = 0
+ func()
+ assert val[None] == 1
+ func()
+ assert val[None] == 3
+
+
+@test_utils.test()
+def test_local_store_in_nested_for_and_if():
+ # See https://github.com/taichi-dev/taichi/pull/862.
+ val = ti.field(ti.i32, shape=(3, 3, 3))
+
+ @ti.kernel
+ def func():
+ serialize()
+ for i, j, k in val:
+ if i < 2 and j < 2 and k < 2:
+ a = 0
+ for di, dj, dk in ti.ndrange((0, 2), (0, 2), (0, 2)):
+ if val[i + di, j + dj, k + dk] == 1:
+ a = val[i + di, j + dj, k + dk]
+
+ for di, dj, dk in ti.ndrange((0, 2), (0, 2), (0, 2)):
+ val[i + di, j + dj, k + dk] = a
+
+ val[1, 1, 1] = 1
+ func()
+
+ for i in range(3):
+ for j in range(3):
+ for k in range(3):
+ assert (val[i, j, k] == 1)
+
+
+@test_utils.test()
+def test_advanced_store_forwarding_continue_in_if():
+ val = ti.field(ti.i32)
+ ti.root.place(val)
+
+ @ti.kernel
+ def func(n: ti.i32):
+ # Launch just one thread
+ for _ in range(1):
+ a = 10
+ b = 0
+ for i in range(n):
+ b += a
+ a = i
+ if i == 5:
+ continue
+ a = 100
+ a = 1000
+ val[None] = a + b
+
+ func(1)
+ assert val[None] == 1010
+ func(5)
+ assert val[None] == 1410
+ func(6)
+ assert val[None] == 1510
+ func(7)
+ assert val[None] == 1515
+
+
+@test_utils.test()
+def test_advanced_store_elimination_in_loop():
+ val = ti.field(ti.i32)
+ ti.root.place(val)
+
+ @ti.kernel
+ def func():
+ # Launch just one thread
+ for _ in range(1):
+ a = 1
+ for i in range(5):
+ b = 1
+ val[None] = a + b
+ b = 0
+ a = 2
+ a = 3
+ a = 4
+ val[None] += a
+
+ func()
+ assert val[None] == 8
+
+
+@test_utils.test()
+def test_parallel_assignment():
+ mat = ti.field(ti.i32, shape=(3, 4))
+
+ @ti.kernel
+ def func():
+ c = 0
+ for i in ti.static(range(4)):
+ mat[0, c], mat[1, c], mat[2, c] = 1, 2, 3
+ c += 1
+
+ func()
+ for i in range(3):
+ for j in range(4):
+ assert mat[i, j] == i + 1
diff --git a/tests/_python_orig/test_packed_size.py b/tests/_python_orig/test_packed_size.py
new file mode 100644
index 000000000..e52c0e5e9
--- /dev/null
+++ b/tests/_python_orig/test_packed_size.py
@@ -0,0 +1,10 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.packed, packed=True)
+def test_packed_size():
+ x = ti.field(ti.i32)
+ ti.root.dense(ti.i, 17).dense(ti.ijk, 129).place(x)
+ assert x.shape == (17 * 129, 129, 129)
+ assert x.snode.parent().parent()._cell_size_bytes == 4 * 129**3
diff --git a/tests/_python_orig/test_parallel_range_for.py b/tests/_python_orig/test_parallel_range_for.py
new file mode 100644
index 000000000..429a28092
--- /dev/null
+++ b/tests/_python_orig/test_parallel_range_for.py
@@ -0,0 +1,22 @@
+import taichi as ti
+from tests import test_utils
+
+
+# such small block_dim will cause grid_dim too large for OpenGL...
+@test_utils.test(exclude=ti.opengl)
+def test_parallel_range_for():
+ n = 1024 * 1024
+ val = ti.field(ti.i32, shape=(n))
+
+ @ti.kernel
+ def fill():
+ ti.parallelize(8)
+ ti.block_dim(8)
+ for i in range(n):
+ val[i] = i
+
+ fill()
+ # To speed up
+ val_np = val.to_numpy()
+ for i in range(n):
+ assert val_np[i] == i
diff --git a/tests/_python_orig/test_pow.py b/tests/_python_orig/test_pow.py
new file mode 100644
index 000000000..8ee43894a
--- /dev/null
+++ b/tests/_python_orig/test_pow.py
@@ -0,0 +1,48 @@
+import taichi as ti
+from tests import test_utils
+
+
+def _test_pow_f(dt):
+ z = ti.field(dt, shape=())
+
+ @ti.kernel
+ def func(x: dt, y: dt):
+ z[None] = x**y
+
+ for x in [0.5, 1, 1.5, 2, 6.66]:
+ for y in [-2, -1, -0.3, 0, 0.5, 1, 1.4, 2.6]:
+ func(x, y)
+ assert abs(z[None] / x**y - 1) < 0.00001
+
+
+def _test_pow_i(dt):
+ z = ti.field(dt, shape=())
+
+ @ti.kernel
+ def func(x: dt, y: ti.template()):
+ z[None] = x**y
+
+ for x in range(-5, 5):
+ for y in range(0, 4):
+ func(x, y)
+ assert z[None] == x**y
+
+
+@test_utils.test()
+def test_pow_f32():
+ _test_pow_f(ti.f32)
+
+
+@test_utils.test(require=ti.extension.data64)
+def test_pow_f64():
+ _test_pow_f(ti.f64)
+
+
+@test_utils.test()
+def test_pow_i32():
+ _test_pow_i(ti.i32)
+
+
+@test_utils.test(require=ti.extension.data64)
+def test_pow_i64():
+ _test_pow_i(ti.i64)
diff --git a/tests/_python_orig/test_print.py b/tests/_python_orig/test_print.py
new file mode 100644
index 000000000..3e62d1919
--- /dev/null
+++ b/tests/_python_orig/test_print.py
@@ -0,0 +1,158 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+# Not really testable..
+# Just making sure it does not crash
+# Metal doesn't support print() or 64-bit data
+# While OpenGL does support print, but not 64-bit data
+@pytest.mark.parametrize('dt', [ti.i32, ti.f32, ti.i64, ti.f64])
+@test_utils.test(exclude=[ti.metal, ti.opengl, ti.vulkan])
+def test_print(dt):
+ @ti.kernel
+ def func():
+ print(ti.cast(1234.5, dt))
+
+ func()
+ # Discussion: https://github.com/taichi-dev/taichi/issues/1063#issuecomment-636421904
+ # Synchronize to prevent cross-test failure of print:
+ ti.sync()
+
+
+# TODO: As described by @k-ye above, what we want to ensure
+# is that, the content shows on console is *correct*.
+@test_utils.test(exclude=[ti.vulkan]) # TODO(changyu): enable ti.vulkan
+def test_multi_print():
+ @ti.kernel
+ def func(x: ti.i32, y: ti.f32):
+ print(x, 1234.5, y)
+
+ func(666, 233.3)
+ ti.sync()
+
+
+@test_utils.test(exclude=[ti.vulkan]) # TODO(changyu): enable ti.vulkan
+def test_print_string():
+ @ti.kernel
+ def func(x: ti.i32, y: ti.f32):
+ # make sure `%` doesn't break vprintf:
+ print('hello, world! %s %d %f', 233, y)
+ print('cool', x, 'well', y)
+
+ func(666, 233.3)
+ ti.sync()
+
+
+@test_utils.test(exclude=[ti.vulkan]) # TODO(changyu): enable ti.vulkan
+def test_print_matrix():
+ x = ti.Matrix.field(2, 3, dtype=ti.f32, shape=())
+ y = ti.Vector.field(3, dtype=ti.f32, shape=3)
+
+ @ti.kernel
+ def func(k: ti.f32):
+ x[None][0, 0] = -1.0
+ y[2] += 1.0
+ print('hello', x[None], 'world!')
+ print(y[2] * k, x[None] / k, y[2])
+
+ func(233.3)
+ ti.sync()
+
+
+@test_utils.test(exclude=[ti.vulkan]) # TODO(changyu): enable ti.vulkan
+def test_print_sep_end():
+ @ti.kernel
+ def func():
+ # hello 42 world!
+ print('hello', 42, 'world!')
+ # hello 42 Taichi 233 world!
+ print('hello', 42, 'Tai', end='')
+ print('chi', 233, 'world!')
+ # hello42world!
+ print('hello', 42, 'world!', sep='')
+ # ' ' (with no newline)
+ print(' ', end='')
+ # 'helloaswd42qwer'
+ print(' ', 42, sep='aswd', end='qwer')
+
+ func()
+ ti.sync()
+
+
+@test_utils.test(exclude=[ti.vulkan]) # TODO(changyu): enable ti.vulkan
+def test_print_multiple_threads():
+ x = ti.field(dtype=ti.f32, shape=(128, ))
+
+ @ti.kernel
+ def func(k: ti.f32):
+ for i in x:
+ x[i] = i * k
+ print('x[', i, ']=', x[i])
+
+ func(0.1)
+ ti.sync()
+ func(10.0)
+ ti.sync()
+
+
+@test_utils.test(exclude=[ti.vulkan]) # TODO(changyu): enable ti.vulkan
+def test_print_list():
+ x = ti.Matrix.field(2, 3, dtype=ti.f32, shape=(2, 3))
+ y = ti.Vector.field(3, dtype=ti.f32, shape=())
+
+ @ti.kernel
+ def func(k: ti.f32):
+ w = [k, x.shape]
+ print(w + [y.n]) # [233.3, [2, 3], 3]
+ print(x.shape) # [2, 3]
+ print(y.shape) # []
+ z = (1, )
+ print([1, k**2, k + 1]) # [1, 233.3, 234.3]
+ print(z) # [1]
+ print([y[None], z]) # [[0, 0, 0], [1]]
+ print([]) # []
+
+ func(233.3)
+ ti.sync()
+
+
+@test_utils.test(arch=ti.cpu)
+def test_python_scope_print_field():
+ x = ti.Matrix.field(2, 3, dtype=ti.f32, shape=())
+ y = ti.Vector.field(3, dtype=ti.f32, shape=3)
+ z = ti.field(dtype=ti.f32, shape=3)
+
+ print(x)
+ print(y)
+ print(z)
+
+
+@test_utils.test(arch=ti.cpu)
+def test_print_string_format():
+ @ti.kernel
+ def func(k: ti.f32):
+ print(123)
+ print("{} abc".format(123))
+ print("{} {} {}".format(1, 2, 3))
+ print("{} {name} {value}".format(k, name=999, value=123))
+ name = 123.4
+ value = 456.7
+ print("{} {name} {value}".format(k, name=name, value=value))
+
+ func(233.3)
+ ti.sync()
+
+
+@test_utils.test(arch=ti.cpu)
+def test_print_fstring():
+ def foo1(x):
+ return x + 1
+
+ @ti.kernel
+ def func(i: ti.i32, f: ti.f32):
+ print(f'qwe {foo1(1)} {foo1(2) * 2 - 1} {i} {f} {4} {True} {1.23}')
+
+ func(123, 4.56)
+ ti.sync()
diff --git a/tests/_python_orig/test_ptr_assign.py b/tests/_python_orig/test_ptr_assign.py
new file mode 100644
index 000000000..63e5a1e7d
--- /dev/null
+++ b/tests/_python_orig/test_ptr_assign.py
@@ -0,0 +1,107 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_ptr_scalar():
+ a = ti.field(dtype=ti.f32, shape=())
+
+ @ti.kernel
+ def func(t: ti.f32):
+ b = ti.static(a)
+ c = ti.static(b)
+ b[None] = b[None] * t
+ c[None] = a[None] + t
+
+ for x, y in zip(range(-5, 5), range(-4, 4)):
+ a[None] = x
+ func(y)
+ assert a[None] == x * y + y
+
+
+@test_utils.test()
+def test_ptr_matrix():
+ a = ti.Matrix.field(2, 2, dtype=ti.f32, shape=())
+
+ @ti.kernel
+ def func(t: ti.f32):
+ a[None] = [[2, 3], [4, 5]]
+ b = ti.static(a)
+ b[None][1, 0] = t
+
+ for x in range(-5, 5):
+ func(x)
+ assert a[None][1, 0] == x
+
+
+@test_utils.test()
+def test_ptr_field():
+ a = ti.field(dtype=ti.f32, shape=(3, 4))
+
+ @ti.kernel
+ def func(t: ti.f32):
+ b = ti.static(a)
+ b[1, 3] = b[1, 2] * t
+ b[2, 0] = b[2, 1] + t
+
+ for x, y in zip(range(-5, 5), range(-4, 4)):
+ a[1, 2] = x
+ a[2, 1] = x
+ func(y)
+ assert a[1, 3] == x * y
+ assert a[2, 0] == x + y
+
+
+@test_utils.test()
+def test_pythonish_tuple_assign():
+ a = ti.field(dtype=ti.f32, shape=())
+ b = ti.field(dtype=ti.f32, shape=())
+
+ @ti.kernel
+ def func(x: ti.f32, y: ti.f32):
+ u, v = ti.static(b, a)
+ u[None] = x
+ v[None] = y
+
+ for x, y in zip(range(-5, 5), range(-4, 4)):
+ func(x, y)
+ assert a[None] == y
+ assert b[None] == x
+
+
+@test_utils.test()
+def test_ptr_func():
+ a = ti.field(dtype=ti.f32, shape=())
+
+ @ti.func
+ def add2numbers(x, y):
+ return x + y
+
+ @ti.kernel
+ def func():
+ add = ti.static(add2numbers)
+ a[None] = add(2, 3)
+
+ func()
+ assert a[None] == 5.0
+
+
+@test_utils.test()
+def test_ptr_class_func():
+ @ti.data_oriented
+ class MyClass:
+ def __init__(self):
+ self.a = ti.field(dtype=ti.f32, shape=())
+
+ @ti.func
+ def add2numbers(self, x, y):
+ return x + y
+
+ @ti.kernel
+ def func(self):
+ a, add = ti.static(self.a, self.add2numbers)
+ a[None] = add(2, 3)
+
+ obj = MyClass()
+ obj.func()
+ assert obj.a[None] == 5.0
diff --git a/tests/_python_orig/test_random.py b/tests/_python_orig/test_random.py
new file mode 100644
index 000000000..fd609b1cb
--- /dev/null
+++ b/tests/_python_orig/test_random.py
@@ -0,0 +1,170 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_random_float():
+ for precision in [ti.f32, ti.f64]:
+ ti.init()
+ n = 1024
+ x = ti.field(ti.f32, shape=(n, n))
+
+ @ti.kernel
+ def fill():
+ for i in range(n):
+ for j in range(n):
+ x[i, j] = ti.random(precision)
+
+ fill()
+ X = x.to_numpy()
+ for i in range(1, 4):
+ assert (X**i).mean() == test_utils.approx(1 / (i + 1), rel=1e-2)
+
+
+@test_utils.test()
+def test_random_int():
+ for precision in [ti.i32, ti.i64]:
+ ti.init()
+ n = 1024
+ x = ti.field(ti.f32, shape=(n, n))
+
+ @ti.kernel
+ def fill():
+ for i in range(n):
+ for j in range(n):
+ v = ti.random(precision)
+ if precision == ti.i32:
+ x[i, j] = (float(v) + float(2**31)) / float(2**32)
+ else:
+ x[i, j] = (float(v) + float(2**63)) / float(2**64)
+
+ fill()
+ X = x.to_numpy()
+ for i in range(1, 4):
+ assert (X**i).mean() == test_utils.approx(1 / (i + 1), rel=1e-2)
+
+
+@test_utils.test()
+def test_random_independent_product():
+ n = 1024
+ x = ti.field(ti.f32, shape=n * n)
+
+ @ti.kernel
+ def fill():
+ for i in range(n * n):
+ a = ti.random()
+ b = ti.random()
+ x[i] = a * b
+
+ fill()
+ X = x.to_numpy()
+ for i in range(4):
+ assert X.mean() == test_utils.approx(1 / 4, rel=1e-2)
+
+
+@test_utils.test()
+def test_random_2d_dist():
+ n = 8192
+
+ x = ti.Vector.field(2, dtype=ti.f32, shape=n)
+
+ @ti.kernel
+ def gen():
+ for i in range(n):
+ x[i] = ti.Vector([ti.random(), ti.random()])
+
+ gen()
+
+ X = x.to_numpy()
+ counters = [0 for _ in range(4)]
+ for i in range(n):
+ c = int(X[i, 0] < 0.5) * 2 + int(X[i, 1] < 0.5)
+ counters[c] += 1
+
+ for c in range(4):
+ assert counters[c] / n == test_utils.approx(1 / 4, rel=0.2)
+
+
+@test_utils.test()
+def test_random_seed_per_launch():
+ n = 10
+ x = ti.field(ti.f32, shape=n)
+
+ @ti.kernel
+ def gen(i: ti.i32):
+ x[i] = ti.random()
+
+ count = 0
+ gen(0)
+ for i in range(1, n):
+ gen(i)
+ count += 1 if x[i] == x[i - 1] else 0
+
+ assert count <= n * 0.15
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda, ti.metal])
+def test_random_seed_per_program():
+ import numpy as np
+ n = 10
+ result = []
+ for s in [0, 1]:
+ ti.init(random_seed=s)
+ x = ti.field(ti.f32, shape=n)
+
+ @ti.kernel
+ def gen():
+ for i in x:
+ x[i] = ti.random()
+
+ gen()
+ result.append(x.to_numpy())
+ ti.reset()
+
+ assert not np.allclose(result[0], result[1])
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_random_f64():
+ '''
+ Tests the granularity of float64 random numbers.
+ See https://github.com/taichi-dev/taichi/issues/2251 for an explanation.
+ '''
+ import numpy as np
+ n = int(2**23)
+ x = ti.field(ti.f64, shape=n)
+
+ @ti.kernel
+ def foo():
+ for i in x:
+ x[i] = ti.random(dtype=ti.f64)
+
+ foo()
+ frac, _ = np.modf(x.to_numpy() * 4294967296)
+ assert np.max(frac) > 0
+
+
+@test_utils.test()
+def test_randn():
+ '''
+ Tests the generation of Gaussian random numbers.
+ '''
+ for precision in [ti.f32, ti.f64]:
+ ti.init()
+ n = 1024
+ x = ti.field(ti.f32, shape=(n, n))
+
+ @ti.kernel
+ def fill():
+ for i in range(n):
+ for j in range(n):
+ x[i, j] = ti.randn(precision)
+
+ fill()
+ X = x.to_numpy()
+
+ # https://en.wikipedia.org/wiki/Normal_distribution#Moments
+ moments = [0.0, 1.0, 0.0, 3.0]
+ for i in range(4):
+ assert (X**(i + 1)).mean() == test_utils.approx(moments[i],
+ abs=3e-2)
diff --git a/tests/_python_orig/test_reduction.py b/tests/_python_orig/test_reduction.py
new file mode 100644
index 000000000..434a1dc58
--- /dev/null
+++ b/tests/_python_orig/test_reduction.py
@@ -0,0 +1,146 @@
+import numpy as np
+import pytest
+from pytest import approx
+
+import taichi as ti
+from tests import test_utils
+
+OP_ADD = 0
+OP_MIN = 1
+OP_MAX = 2
+OP_AND = 3
+OP_OR = 4
+OP_XOR = 5
+
+ti_ops = {
+ OP_ADD: ti.atomic_add,
+ OP_MIN: ti.atomic_min,
+ OP_MAX: ti.atomic_max,
+ OP_AND: ti.atomic_and,
+ OP_OR: ti.atomic_or,
+ OP_XOR: ti.atomic_xor
+}
+
+np_ops = {
+ OP_ADD: np.sum,
+ OP_MIN: lambda a: a.min(),
+ OP_MAX: lambda a: a.max(),
+ OP_AND: np.bitwise_and.reduce,
+ OP_OR: np.bitwise_or.reduce,
+ OP_XOR: np.bitwise_xor.reduce
+}
+
+
+def _test_reduction_single(dtype, criterion, op):
+ N = 1024 * 1024
+ if (ti.lang.impl.current_cfg().arch == ti.opengl or
+ ti.lang.impl.current_cfg().arch == ti.vulkan) and dtype == ti.f32:
+ # OpenGL/Vulkan are not capable of such large number in its float32...
+ N = 1024 * 16
+
+ a = ti.field(dtype, shape=N)
+ tot = ti.field(dtype, shape=())
+
+ if dtype in [ti.f32, ti.f64]:
+
+ @ti.kernel
+ def fill():
+ for i in a:
+ a[i] = i + 0.5
+ else:
+
+ @ti.kernel
+ def fill():
+ for i in a:
+ a[i] = i + 1
+
+ ti_op = ti_ops[op]
+
+ @ti.kernel
+ def reduce():
+ for i in a:
+ ti_op(tot[None], a[i])
+
+ @ti.kernel
+ def reduce_tmp() -> dtype:
+ s = ti.zero(tot[None]) if op == OP_ADD or op == OP_XOR else a[0]
+ for i in a:
+ ti_op(s, a[i])
+ return s
+
+ fill()
+ tot[None] = 0 if op in [OP_ADD, OP_XOR] else a[0]
+ reduce()
+ tot2 = reduce_tmp()
+
+ np_arr = a.to_numpy()
+ ground_truth = np_ops[op](np_arr)
+
+ assert criterion(tot[None], ground_truth)
+ assert criterion(tot2, ground_truth)
+
+
+@pytest.mark.parametrize('op', [OP_ADD, OP_MIN, OP_MAX, OP_AND, OP_OR, OP_XOR])
+@test_utils.test()
+def test_reduction_single_i32(op):
+ _test_reduction_single(ti.i32, lambda x, y: x % 2**32 == y % 2**32, op)
+
+
+@pytest.mark.parametrize('op', [OP_ADD])
+@test_utils.test(exclude=ti.opengl)
+def test_reduction_single_u32(op):
+ _test_reduction_single(ti.u32, lambda x, y: x % 2**32 == y % 2**32, op)
+
+
+@pytest.mark.parametrize('op', [OP_ADD, OP_MIN, OP_MAX])
+@test_utils.test()
+def test_reduction_single_f32(op):
+ _test_reduction_single(ti.f32, lambda x, y: x == approx(y, 3e-4), op)
+
+
+@pytest.mark.parametrize('op', [OP_ADD])
+@test_utils.test(require=ti.extension.data64)
+def test_reduction_single_i64(op):
+ _test_reduction_single(ti.i64, lambda x, y: x % 2**64 == y % 2**64, op)
+
+
+@pytest.mark.parametrize('op', [OP_ADD])
+@test_utils.test(exclude=ti.opengl, require=ti.extension.data64)
+def test_reduction_single_u64(op):
+ _test_reduction_single(ti.u64, lambda x, y: x % 2**64 == y % 2**64, op)
+
+
+@pytest.mark.parametrize('op', [OP_ADD])
+@test_utils.test(require=ti.extension.data64)
+def test_reduction_single_f64(op):
+ _test_reduction_single(ti.f64, lambda x, y: x == approx(y, 1e-12), op)
+
+
+@test_utils.test()
+def test_reduction_different_scale():
+ @ti.kernel
+ def func(n: ti.template()) -> ti.i32:
+ x = 0
+ for i in range(n):
+ ti.atomic_add(x, 1)
+ return x
+
+ # 10 and 60 since OpenGL TLS stride size = 32
+ # 1024 and 100000 since OpenGL max threads per group ~= 1792
+ for n in [1, 10, 60, 1024, 100000]:
+ assert n == func(n)
+
+
+@test_utils.test()
+def test_reduction_any_arr():
+ @ti.kernel
+ def reduce(a: ti.any_arr()) -> ti.i32:
+ s = 0
+ for i in a:
+ ti.atomic_add(s, a[i])
+ ti.atomic_sub(s, 2)
+ return s
+
+ n = 1024
+ x = np.ones(n, dtype=np.int32)
+ assert reduce(x) == -n
diff --git a/tests/_python_orig/test_rescale.py b/tests/_python_orig/test_rescale.py
new file mode 100644
index 000000000..abf95da4e
--- /dev/null
+++ b/tests/_python_orig/test_rescale.py
@@ -0,0 +1,36 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_rescale():
+ a = ti.field(ti.f32)
+ b = ti.field(ti.f32)
+ ti.root.dense(ti.ij, 4).dense(ti.ij, 4).place(a)
+ ti.root.dense(ti.ij, 4).place(b)
+
+ @ti.kernel
+ def set_b():
+ for I in ti.grouped(a):
+ Ib = ti.rescale_index(a, b, I)
+ b[Ib] += 1.0
+
+ @ti.kernel
+ def set_a():
+ for I in ti.grouped(b):
+ Ia = ti.rescale_index(b, a, I)
+ a[Ia] = 1.0
+
+ set_a()
+ set_b()
+
+ for i in range(0, 4):
+ for j in range(0, 4):
+ assert b[i, j] == 16
+
+ for i in range(0, 16):
+ for j in range(0, 16):
+ if i % 4 == 0 and j % 4 == 0:
+ assert a[i, j] == 1
+ else:
+ assert a[i, j] == 0
diff --git a/tests/_python_orig/test_return.py b/tests/_python_orig/test_return.py
new file mode 100644
index 000000000..30bcc273d
--- /dev/null
+++ b/tests/_python_orig/test_return.py
@@ -0,0 +1,149 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_return_without_type_hint():
+ @ti.kernel
+ def kernel():
+ return 1
+
+ with pytest.raises(ti.TaichiCompilationError):
+ kernel()
+
+
+def test_const_func_ret():
+ ti.init()
+
+ @ti.kernel
+ def func1() -> ti.f32:
+ return 3
+
+ @ti.kernel
+ def func2() -> ti.i32:
+ return 3.3 # return type mismatch, will be auto-casted into ti.i32
+
+ assert func1() == test_utils.approx(3)
+ assert func2() == 3
+
+
+@test_utils.test()
+def _test_binary_func_ret(dt1, dt2, dt3, castor):
+ @ti.kernel
+ def func(a: dt1, b: dt2) -> dt3:
+ return a * b
+
+ if ti.types.is_integral(dt1):
+ xs = list(range(4))
+ else:
+ xs = [0.2, 0.4, 0.8, 1.0]
+
+ if ti.types.is_integral(dt2):
+ ys = list(range(4))
+ else:
+ ys = [0.2, 0.4, 0.8, 1.0]
+
+ for x, y in zip(xs, ys):
+ assert func(x, y) == test_utils.approx(castor(x * y))
+
+
+def test_binary_func_ret():
+ _test_binary_func_ret(ti.i32, ti.f32, ti.f32, float)
+ _test_binary_func_ret(ti.f32, ti.i32, ti.f32, float)
+ _test_binary_func_ret(ti.i32, ti.f32, ti.i32, int)
+ _test_binary_func_ret(ti.f32, ti.i32, ti.i32, int)
+
+
+@test_utils.test()
+def test_return_in_static_if():
+ @ti.kernel
+ def foo(a: ti.template()) -> ti.i32:
+ if ti.static(a == 1):
+ return 1
+ elif ti.static(a == 2):
+ return 2
+ return 3
+
+ assert foo(1) == 1
+ assert foo(2) == 2
+ assert foo(123) == 3
+
+
+@test_utils.test()
+def test_func_multiple_return():
+ @ti.func
+ def safe_sqrt(a):
+ if a > 0:
+ return ti.sqrt(a)
+ else:
+ return 0.0
+
+ @ti.kernel
+ def kern(a: float):
+ print(safe_sqrt(a))
+
+ with pytest.raises(
+ ti.TaichiCompilationError,
+ match='Return inside non-static if/for is not supported'):
+ kern(-233)
+
+
+@test_utils.test()
+def test_return_inside_static_for():
+ @ti.kernel
+ def foo() -> ti.i32:
+ a = 0
+ for i in ti.static(range(10)):
+ a += i * i
+ if ti.static(i == 8):
+ return a
+
+ assert foo() == 204
+
+
+@test_utils.test()
+def test_return_inside_non_static_for():
+ with pytest.raises(
+ ti.TaichiCompilationError,
+ match='Return inside non-static if/for is not supported'):
+
+ @ti.kernel
+ def foo() -> ti.i32:
+ for i in range(10):
+ return i
+
+ foo()
+
+
+@test_utils.test()
+def test_kernel_no_return():
+ with pytest.raises(
+ ti.TaichiSyntaxError,
+ match=
+ "Kernel has a return type but does not have a return statement"):
+
+ @ti.kernel
+ def foo() -> ti.i32:
+ pass
+
+ foo()
+
+
+@test_utils.test()
+def test_func_no_return():
+ with pytest.raises(
+ ti.TaichiCompilationError,
+ match=
+ "Function has a return type but does not have a return statement"):
+
+ @ti.func
+ def bar() -> ti.i32:
+ pass
+
+ @ti.kernel
+ def foo() -> ti.i32:
+ return bar()
+
+ foo()
diff --git a/tests/_python_orig/test_runtime.py b/tests/_python_orig/test_runtime.py
new file mode 100644
index 000000000..a397e0c60
--- /dev/null
+++ b/tests/_python_orig/test_runtime.py
@@ -0,0 +1,163 @@
+import copy
+import os
+import sys
+from contextlib import contextmanager
+
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@contextmanager
+def patch_os_environ_helper(custom_environ: dict, excludes: dict):
+ """
+ Temporarily patch os.environ for testing.
+ Originally created by @rexwangcc in test_cli.py
+ @archibate tweaked this method to be an os.environ patcher.
+
+ The patched environ will be:
+ custom_environ + (os.environ - excludes - custom_environ).
+
+ I.e.:
+
+ 1. custom_environ could override os.environ.
+ 2. os.environ keys match excludes will not be included.
+
+ :parameter custom_environ:
+ Specify the base environment of patch, these values must
+ be included.
+
+ :parameter excludes:
+ When copying from os.environ, specify keys to be excluded.
+ """
+ environ = {}
+ for key in os.environ.keys():
+ if key not in excludes:
+ environ[key] = os.environ[key]
+ for key in custom_environ.keys():
+ environ[key] = custom_environ[key]
+ try:
+ cached_environ = os.environ
+ os.environ = custom_environ
+ yield os.environ
+ finally:
+ os.environ = cached_environ
+
+
+TF = [True, False]
+init_args = {
+ # 'key': [default, choices],
+ 'log_level': ['info', ['error', 'warn', 'info', 'debug', 'trace']],
+ 'gdb_trigger': [False, TF],
+ 'advanced_optimization': [True, TF],
+ 'debug': [False, TF],
+ 'print_ir': [False, TF],
+ 'verbose': [True, TF],
+ 'fast_math': [True, TF],
+ 'async_mode': [False, TF],
+ 'flatten_if': [False, TF],
+ 'simplify_before_lower_access': [True, TF],
+ 'simplify_after_lower_access': [True, TF],
+ 'kernel_profiler': [False, TF],
+ 'check_out_of_bound': [False, TF],
+ 'print_accessor_ir': [False, TF],
+ 'print_evaluator_ir': [False, TF],
+ 'print_struct_llvm_ir': [False, TF],
+ 'print_kernel_llvm_ir': [False, TF],
+ 'print_kernel_llvm_ir_optimized': [False, TF],
+ # FIXME: figure out why these two failed test:
+ #'device_memory_fraction': [0.0, [0.5, 1, 0]],
+ #'device_memory_GB': [1.0, [0.5, 1, 1.5, 2]],
+}
+
+env_configs = ['TI_' + key.upper() for key in init_args.keys()]
+
+special_init_cfgs = [
+ 'log_level',
+ 'gdb_trigger',
+]
+
+
+@pytest.mark.parametrize('key,values', init_args.items())
+def test_init_arg(key, values):
+ default, values = values
+
+ # helper function:
+ def test_arg(key, value, kwargs={}):
+ if key in special_init_cfgs:
+ spec_cfg = ti.init(_test_mode=True, **kwargs)
+ cfg = spec_cfg
+ else:
+ ti.init(**kwargs)
+ cfg = ti.lang.impl.current_cfg()
+ assert getattr(cfg, key) == value
+
+ with patch_os_environ_helper({}, excludes=env_configs):
+ # test if default value is correct:
+ test_arg(key, default)
+
+ # test if specified in argument:
+ for value in values:
+ kwargs = {key: value}
+ test_arg(key, value, kwargs)
+
+ # test if specified in environment:
+ env_key = 'TI_' + key.upper()
+ for value in values:
+ env_value = str(int(value) if isinstance(value, bool) else value)
+ environ = {env_key: env_value}
+ with patch_os_environ_helper(environ, excludes=env_configs):
+ test_arg(key, value)
+
+
+@pytest.mark.parametrize('arch', test_utils.expected_archs())
+def test_init_arch(arch):
+ with patch_os_environ_helper({}, excludes=['TI_ARCH']):
+ ti.init(arch=arch)
+ assert ti.lang.impl.current_cfg().arch == arch
+ with patch_os_environ_helper({'TI_ARCH': ti._lib.core.arch_name(arch)},
+ excludes=['TI_ARCH']):
+ ti.init(arch=ti.cc)
+ assert ti.lang.impl.current_cfg().arch == arch
+
+
+def test_init_bad_arg():
+ with pytest.raises(KeyError):
+ ti.init(_test_mode=True, debug=True, foo_bar=233)
+
+
+def test_init_require_version():
+ ti_core = ti._lib.utils.import_ti_core()
+ require_version = '{}.{}.{}'.format(ti_core.get_version_major(),
+ ti_core.get_version_minor(),
+ ti_core.get_version_patch())
+ ti.init(_test_mode=True, debug=True, require_version=require_version)
+
+
+def test_init_bad_require_version():
+ with pytest.raises(Exception):
+ ti_core = ti._lib.utils.import_ti_core()
+ bad_require_version = '{}.{}.{}'.format(
+ ti_core.get_version_major(), ti_core.get_version_minor(),
+ ti_core.get_version_patch() + 1)
+ ti.init(_test_mode=True,
+ debug=True,
+ require_version=bad_require_version)
+
+
+@pytest.mark.parametrize(
+ 'level', [ti.DEBUG, ti.TRACE, ti.INFO, ti.WARN, ti.ERROR, ti.CRITICAL])
+@test_utils.test()
+def test_supported_log_levels(level):
+ spec_cfg = ti.init(_test_mode=True, log_level=level)
+ assert spec_cfg.log_level == level
+
+
+@pytest.mark.parametrize(
+ 'level', [ti.DEBUG, ti.TRACE, ti.INFO, ti.WARN, ti.ERROR, ti.CRITICAL])
+@test_utils.test()
+def test_supported_log_levels(level):
+ spec_cfg = ti.init(_test_mode=True)
+ ti.set_logging_level(level)
+ assert ti._logging.is_logging_effective(level)
diff --git a/tests/_python_orig/test_scalar_op.py b/tests/_python_orig/test_scalar_op.py
new file mode 100644
index 000000000..1be760630
--- /dev/null
+++ b/tests/_python_orig/test_scalar_op.py
@@ -0,0 +1,192 @@
+import operator as ops
+
+import numpy as np
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+binary_func_table = [
+ (ops.add, ) * 2,
+ (ops.sub, ) * 2,
+ (ops.mul, ) * 2,
+ (ops.truediv, ) * 2,
+ (ops.floordiv, ) * 2,
+ (ops.mod, ) * 2,
+ (ops.pow, ) * 2,
+ (ops.and_, ) * 2,
+ (ops.or_, ) * 2,
+ (ops.xor, ) * 2,
+ (ops.eq, ) * 2,
+ (ops.ne, ) * 2,
+ (ops.lt, ) * 2,
+ (ops.le, ) * 2,
+ (ops.gt, ) * 2,
+ (ops.ge, ) * 2,
+ (ti.max, np.maximum),
+ (ti.min, np.minimum),
+ (ti.atan2, np.arctan2),
+]
+
+unary_func_table = [
+ (ops.neg, ) * 2,
+ (ops.invert, ) * 2,
+ (ti.lang.ops.logical_not, np.logical_not),
+ (ti.lang.ops.abs, np.abs),
+ (ti.exp, np.exp),
+ (ti.log, np.log),
+ (ti.sin, np.sin),
+ (ti.cos, np.cos),
+ (ti.tan, np.tan),
+ (ti.asin, np.arcsin),
+ (ti.acos, np.arccos),
+ (ti.tanh, np.tanh),
+ (ti.round, np.round),
+ (ti.floor, np.floor),
+ (ti.ceil, np.ceil),
+]
+
+
+@pytest.mark.parametrize('ti_func,np_func', binary_func_table)
+def test_python_scope_vector_binary(ti_func, np_func):
+ ti.init()
+ x = ti.Vector([2, 3])
+ y = ti.Vector([5, 4])
+
+ result = ti_func(x, y).to_numpy()
+ if ti_func in [ops.eq, ops.ne, ops.lt, ops.le, ops.gt, ops.ge]:
+ result = result.astype(bool)
+ expected = np_func(x.to_numpy(), y.to_numpy())
+ assert test_utils.allclose(result, expected)
+
+
+@pytest.mark.parametrize('ti_func,np_func', unary_func_table)
+def test_python_scope_vector_unary(ti_func, np_func):
+ ti.init()
+ x = ti.Vector([2, 3] if ti_func in
+ [ops.invert, ti.lang.ops.logical_not] else [0.2, 0.3])
+
+ result = ti_func(x).to_numpy()
+ if ti_func in [ti.lang.ops.logical_not]:
+ result = result.astype(bool)
+ expected = np_func(x.to_numpy())
+ assert test_utils.allclose(result, expected)
+
+
+def test_python_scope_matmul():
+ ti.init()
+ a = np.array([[1, 2], [3, 4]])
+ b = np.array([[5, 6], [7, 8]])
+ x = ti.Vector(a)
+ y = ti.Vector(b)
+
+ result = (x @ y).to_numpy()
+ expected = a @ b
+ assert test_utils.allclose(result, expected)
+
+
+def test_python_scope_linalg():
+ ti.init()
+ a = np.array([3, 4, -2])
+ b = np.array([-5, 0, 6])
+ x = ti.Vector(a)
+ y = ti.Vector(b)
+
+ assert test_utils.allclose(x.dot(y), np.dot(a, b))
+ assert test_utils.allclose(x.norm(), np.sqrt(np.dot(a, a)))
+ assert test_utils.allclose(x.normalized(), a / np.sqrt(np.dot(a, a)))
+ assert x.any() == 1 # To match that of Taichi IR, we return -1 for True
+ assert y.all() == 0
+
+
+@test_utils.test(arch=[ti.x64, ti.cuda, ti.metal])
+def test_16_min_max():
+ @ti.kernel
+ def min_u16(a: ti.u16, b: ti.u16) -> ti.u16:
+ return ti.min(a, b)
+
+ @ti.kernel
+ def min_i16(a: ti.i16, b: ti.i16) -> ti.i16:
+ return ti.min(a, b)
+
+ @ti.kernel
+ def max_u16(a: ti.u16, b: ti.u16) -> ti.u16:
+ return ti.max(a, b)
+
+ @ti.kernel
+ def max_i16(a: ti.i16, b: ti.i16) -> ti.i16:
+ return ti.max(a, b)
+
+ a, b = 4, 2
+ assert min_u16(a, b) == min(a, b)
+ assert min_i16(a, b) == min(a, b)
+ assert max_u16(a, b) == max(a, b)
+ assert max_i16(a, b) == max(a, b)
+
+
+@test_utils.test(exclude=[ti.opengl, ti.cc])
+def test_32_min_max():
+ @ti.kernel
+ def min_u32(a: ti.u32, b: ti.u32) -> ti.u32:
+ return ti.min(a, b)
+
+ @ti.kernel
+ def min_i32(a: ti.i32, b: ti.i32) -> ti.i32:
+ return ti.min(a, b)
+
+ @ti.kernel
+ def max_u32(a: ti.u32, b: ti.u32) -> ti.u32:
+ return ti.max(a, b)
+
+ @ti.kernel
+ def max_i32(a: ti.i32, b: ti.i32) -> ti.i32:
+ return ti.max(a, b)
+
+ a, b = 4, 2
+ assert min_u32(a, b) == min(a, b)
+ assert min_i32(a, b) == min(a, b)
+ assert max_u32(a, b) == max(a, b)
+ assert max_i32(a, b) == max(a, b)
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_64_min_max():
+ @ti.kernel
+ def min_u64(a: ti.u64, b: ti.u64) -> ti.u64:
+ return ti.min(a, b)
+
+ @ti.kernel
+ def min_i64(a: ti.i64, b: ti.i64) -> ti.i64:
+ return ti.min(a, b)
+
+ @ti.kernel
+ def max_u64(a: ti.u64, b: ti.u64) -> ti.u64:
+ return ti.max(a, b)
+
+ @ti.kernel
+ def max_i64(a: ti.i64, b: ti.i64) -> ti.i64:
+ return ti.max(a, b)
+
+ a, b = 4, 2
+ assert min_u64(a, b) == min(a, b)
+ assert min_i64(a, b) == min(a, b)
+ assert max_u64(a, b) == max(a, b)
+ assert max_i64(a, b) == max(a, b)
+
+
+@test_utils.test()
+def test_min_max_vector_starred():
+ @ti.kernel
+ def min_starred() -> ti.i32:
+ a = ti.Vector([1, 2, 3])
+ b = ti.Vector([4, 5, 6])
+ return ti.min(*a, *b)
+
+ @ti.kernel
+ def max_starred() -> ti.i32:
+ a = ti.Vector([1, 2, 3])
+ b = ti.Vector([4, 5, 6])
+ return ti.max(*a, *b)
+
+ assert min_starred() == 1
+ assert max_starred() == 6
diff --git a/tests/_python_orig/test_scope_errors.py b/tests/_python_orig/test_scope_errors.py
new file mode 100644
index 000000000..ce6ef9231
--- /dev/null
+++ b/tests/_python_orig/test_scope_errors.py
@@ -0,0 +1,54 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_if():
+ x = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, 1).place(x)
+
+ @ti.kernel
+ def func():
+ if True:
+ a = 0
+ else:
+ a = 1
+ print(a)
+
+ with pytest.raises(Exception):
+ func()
+
+
+@test_utils.test()
+def test_for():
+ x = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, 1).place(x)
+
+ @ti.kernel
+ def func():
+ for i in range(10):
+ a = i
+ print(a)
+
+ with pytest.raises(Exception):
+ func()
+
+
+@test_utils.test()
+def test_while():
+ x = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, 1).place(x)
+
+ @ti.kernel
+ def func():
+ while True:
+ a = 0
+ print(a)
+
+ with pytest.raises(Exception):
+ func()
diff --git a/tests/_python_orig/test_serial_execution.py b/tests/_python_orig/test_serial_execution.py
new file mode 100644
index 000000000..5acd81502
--- /dev/null
+++ b/tests/_python_orig/test_serial_execution.py
@@ -0,0 +1,38 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=ti.cpu, cpu_max_num_threads=1)
+def test_serial_range_for():
+ n = 1024 * 32
+ s = ti.field(dtype=ti.i32, shape=n)
+ counter = ti.field(dtype=ti.i32, shape=())
+
+ @ti.kernel
+ def fill_range():
+ counter[None] = 0
+ for i in range(n):
+ s[ti.atomic_add(counter[None], 1)] = i
+
+ fill_range()
+
+ for i in range(n):
+ assert s[i] == i
+
+
+@test_utils.test(arch=ti.cpu, cpu_max_num_threads=1)
+def test_serial_struct_for():
+ n = 1024 * 32
+ s = ti.field(dtype=ti.i32, shape=n)
+ counter = ti.field(dtype=ti.i32, shape=())
+
+ @ti.kernel
+ def fill_struct():
+ counter[None] = 0
+ for i in s:
+ s[ti.atomic_add(counter[None], 1)] = i
+
+ fill_struct()
+
+ for i in range(n):
+ assert s[i] == i
diff --git a/tests/_python_orig/test_sfg.py b/tests/_python_orig/test_sfg.py
new file mode 100644
index 000000000..edee8e287
--- /dev/null
+++ b/tests/_python_orig/test_sfg.py
@@ -0,0 +1,124 @@
+import numpy as np
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=[ti.extension.async_mode, ti.extension.sparse],
+ async_mode=True)
+def test_remove_clear_list_from_fused_serial():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+ z = ti.field(ti.i32, shape=())
+
+ n = 32
+ ti.root.pointer(ti.i, n).dense(ti.i, 1).place(x)
+ ti.root.pointer(ti.i, n).dense(ti.i, 1).place(y)
+
+ @ti.kernel
+ def init_xy():
+ for i in range(n):
+ if i & 1:
+ x[i] = i
+ else:
+ y[i] = i
+
+ init_xy()
+ ti.sync()
+
+ stats = ti.tools.async_utils.get_kernel_stats()
+ stats.clear()
+
+ @ti.kernel
+ def inc(f: ti.template()):
+ for i in f:
+ f[i] += 1
+
+ @ti.kernel
+ def serial_z():
+ z[None] = 40
+ z[None] += 2
+
+ inc(x)
+ inc(y)
+ serial_z()
+ inc(x)
+ inc(y)
+ ti.sync()
+
+ counters = stats.get_counters()
+ # each of x and y has two listgens: root -> pointer -> dense
+ assert int(counters['launched_tasks_list_gen']) == 4
+ # clear list tasks have been fused into serial_z
+ assert int(counters['launched_tasks_serial']) == 1
+
+ xs = x.to_numpy()
+ ys = y.to_numpy()
+ for i in range(n):
+ if i & 1:
+ assert xs[i] == i + 2
+ assert ys[i] == 0
+ else:
+ assert ys[i] == i + 2
+ assert xs[i] == 0
+
+
+@test_utils.test(require=ti.extension.async_mode, async_mode=True)
+def test_sfg_dead_store_elimination():
+ n = 32
+
+ x = ti.field(dtype=float, shape=n, needs_grad=True)
+ total_energy = ti.field(dtype=float, shape=(), needs_grad=True)
+ unused = ti.field(dtype=float, shape=())
+
+ @ti.kernel
+ def gather():
+ for i in x:
+ e = x[i]**2
+ total_energy[None] += e
+
+ @ti.kernel
+ def scatter():
+ for i in x:
+ unused[None] += x[i]
+
+ xnp = np.arange(n, dtype=np.float32)
+ x.from_numpy(xnp)
+ ti.sync()
+
+ stats = ti.tools.async_utils.get_kernel_stats()
+ stats.clear()
+
+ for _ in range(5):
+ with ti.Tape(total_energy):
+ gather()
+ scatter()
+
+ ti.sync()
+ counters = stats.get_counters()
+
+ # gather() should be DSE'ed
+ assert counters['sfg_dse_tasks'] > 0
+
+ x_grad = x.grad.to_numpy()
+ for i in range(n):
+ assert test_utils.approx(x_grad[i]) == 2.0 * i
+
+
+@test_utils.test(require=ti.extension.async_mode, async_mode=True)
+def test_global_tmp_value_state():
+ # https://github.com/taichi-dev/taichi/issues/2024
+ n = 10
+ x = ti.field(ti.f32, shape=(n, ))
+
+ @ti.kernel
+ def compute_mean_of_boundary_edges() -> ti.i32:
+ total = 0.0
+ for i in range(n):
+ total += x[i] + x[i] * x[i]
+ result = total / ti.cast(n, ti.i32)
+ return result
+
+ x.from_numpy(np.arange(0, n, dtype=np.float32))
+ mean = compute_mean_of_boundary_edges()
+ assert test_utils.approx(mean) == 33
diff --git a/tests/_python_orig/test_snode_layout_inspection.py b/tests/_python_orig/test_snode_layout_inspection.py
new file mode 100644
index 000000000..ced2699a3
--- /dev/null
+++ b/tests/_python_orig/test_snode_layout_inspection.py
@@ -0,0 +1,58 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=ti.cpu)
+def test_primitives():
+ x = ti.field(dtype=ti.i16)
+ y = ti.field(dtype=ti.f32)
+ z = ti.field(dtype=ti.f64)
+
+ p = ti.field(dtype=ti.f32)
+ q = ti.field(dtype=ti.f32)
+ r = ti.field(dtype=ti.f64)
+
+ n1 = ti.root.dense(ti.i, 32)
+ n1.place(x)
+
+ n2 = ti.root.dense(ti.i, 32)
+ n2.place(y, z)
+
+ n3 = ti.root.dense(ti.i, 1)
+ n3.place(p, q, r)
+
+ assert n1._cell_size_bytes == 2
+ assert n2._cell_size_bytes in [12, 16]
+ assert n3._cell_size_bytes == 16
+
+ assert n1._offset_bytes_in_parent_cell == 0
+ assert n2._offset_bytes_in_parent_cell == 2 * 32
+ assert n3._offset_bytes_in_parent_cell in [
+ 2 * 32 + 12 * 32, 2 * 32 + 16 * 32
+ ]
+
+ assert x.snode._offset_bytes_in_parent_cell == 0
+ assert y.snode._offset_bytes_in_parent_cell == 0
+ assert z.snode._offset_bytes_in_parent_cell in [4, 8]
+ assert p.snode._offset_bytes_in_parent_cell == 0
+ assert q.snode._offset_bytes_in_parent_cell == 4
+ assert r.snode._offset_bytes_in_parent_cell == 8
+
+
+@test_utils.test(arch=ti.cpu)
+def test_bit_struct():
+ cit = ti.types.quantized_types.quant.int(16, False)
+ x = ti.field(dtype=cit)
+ y = ti.field(dtype=ti.types.quantized_types.type_factory.custom_float(
+ significand_type=cit))
+ z = ti.field(dtype=ti.f32)
+
+ n1 = ti.root.dense(ti.i, 32)
+ n1.bit_struct(num_bits=32).place(x)
+
+ n2 = ti.root.dense(ti.i, 4)
+ n2.bit_struct(num_bits=32).place(y)
+ n2.place(z)
+
+ assert n1._cell_size_bytes == 4
+ assert n2._cell_size_bytes == 8
diff --git a/tests/_python_orig/test_sort.py b/tests/_python_orig/test_sort.py
new file mode 100644
index 000000000..1eb3647e0
--- /dev/null
+++ b/tests/_python_orig/test_sort.py
@@ -0,0 +1,33 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(exclude=[ti.cc])
+def test_sort():
+ def test_sort_for_dtype(dtype, N):
+ keys = ti.field(dtype, N)
+ values = ti.field(dtype, N)
+
+ @ti.kernel
+ def fill():
+ for i in keys:
+ keys[i] = ti.random() * N
+ values[i] = keys[i]
+
+ fill()
+ ti._kernels.parallel_sort(keys, values)
+
+ keys_host = keys.to_numpy()
+ values_host = values.to_numpy()
+
+ for i in range(N):
+ if i < N - 1:
+ assert keys_host[i] <= keys_host[i + 1]
+ assert keys_host[i] == values_host[i]
+
+ test_sort_for_dtype(ti.i32, 1)
+ test_sort_for_dtype(ti.i32, 256)
+ test_sort_for_dtype(ti.i32, 100001)
+ test_sort_for_dtype(ti.f32, 1)
+ test_sort_for_dtype(ti.f32, 256)
+ test_sort_for_dtype(ti.f32, 100001)
diff --git a/tests/_python_orig/test_sparse_activate.py b/tests/_python_orig/test_sparse_activate.py
new file mode 100644
index 000000000..3c5d49820
--- /dev/null
+++ b/tests/_python_orig/test_sparse_activate.py
@@ -0,0 +1,57 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_pointer():
+ x = ti.field(ti.f32)
+ s = ti.field(ti.i32)
+
+ n = 16
+
+ ptr = ti.root.pointer(ti.i, n)
+ ptr.dense(ti.i, n).place(x)
+ ti.root.place(s)
+
+ s[None] = 0
+
+ @ti.kernel
+ def activate():
+ ti.activate(ptr, 1)
+ ti.activate(ptr, 32)
+
+ @ti.kernel
+ def func():
+ for i in x:
+ s[None] += 1
+
+ activate()
+ func()
+ assert s[None] == 32
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_non_dfs_snode_order():
+ x = ti.field(dtype=ti.i32)
+ y = ti.field(dtype=ti.i32)
+
+ grid1 = ti.root.dense(ti.i, 1)
+ grid2 = ti.root.dense(ti.i, 1)
+ ptr = grid1.pointer(ti.i, 1)
+ ptr.place(x)
+ grid2.place(y)
+ '''
+ This SNode tree has node ids that do not follow DFS order:
+ S0root
+ S1dense
+ S3pointer
+ S4place
+ S2dense
+ S5place
+ '''
+ @ti.kernel
+ def foo():
+ ti.activate(ptr, [0])
+
+ foo() # Just make sure it doesn't crash
+ ti.sync()
diff --git a/tests/_python_orig/test_sparse_basics.py b/tests/_python_orig/test_sparse_basics.py
new file mode 100644
index 000000000..118f3bfbb
--- /dev/null
+++ b/tests/_python_orig/test_sparse_basics.py
@@ -0,0 +1,106 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_pointer():
+ x = ti.field(ti.f32)
+ s = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.pointer(ti.i, n).dense(ti.i, n).place(x)
+ ti.root.place(s)
+
+ @ti.kernel
+ def func():
+ for i in x:
+ s[None] += 1
+
+ x[0] = 1
+ x[127] = 1
+ x[256] = 1
+
+ func()
+ assert s[None] == 256
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_pointer_is_active():
+ x = ti.field(ti.f32)
+ s = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.pointer(ti.i, n).dense(ti.i, n).place(x)
+ ti.root.place(s)
+
+ @ti.kernel
+ def func():
+ for i in range(n * n):
+ s[None] += ti.is_active(x.parent().parent(), i)
+
+ x[0] = 1
+ x[127] = 1
+ x[256] = 1
+
+ func()
+ assert s[None] == 256
+
+
+def _test_pointer2():
+ x = ti.field(ti.f32)
+ s = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.pointer(ti.i, n).pointer(ti.i, n).dense(ti.i, n).place(x)
+ ti.root.place(s)
+
+ @ti.kernel
+ def func():
+ for i in x:
+ s[None] += 1
+
+ x[0] = 1
+ x[127] = 1
+ x[254] = 1
+ x[256 + n * n] = 1
+
+ x[257 + n * n] = 1
+ x[257 + n * n * 2] = 1
+ x[257 + n * n * 5] = 1
+
+ func()
+ assert s[None] == 5 * n
+ print(x[257 + n * n * 7])
+ assert s[None] == 5 * n
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_pointer2():
+ _test_pointer2()
+
+
+@test_utils.test(require=[ti.extension.sparse, ti.extension.packed],
+ packed=True)
+def test_pointer2_packed():
+ _test_pointer2()
+
+
+@pytest.mark.skip(reason='https://github.com/taichi-dev/taichi/issues/2520')
+@test_utils.test(require=ti.extension.sparse)
+def test_pointer_direct_place():
+ x, y = ti.field(ti.i32), ti.field(ti.i32)
+
+ N = 1
+ ti.root.pointer(ti.i, N).place(x)
+ ti.root.pointer(ti.i, N).place(y)
+
+ @ti.kernel
+ def foo():
+ pass
+
+ foo()
diff --git a/tests/_python_orig/test_sparse_deactivate.py b/tests/_python_orig/test_sparse_deactivate.py
new file mode 100644
index 000000000..e2bce91b4
--- /dev/null
+++ b/tests/_python_orig/test_sparse_deactivate.py
@@ -0,0 +1,218 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_pointer():
+ x = ti.field(ti.f32)
+ s = ti.field(ti.i32, shape=())
+
+ n = 16
+
+ ptr = ti.root.pointer(ti.i, n)
+ ptr.dense(ti.i, n).place(x)
+
+ s[None] = 0
+
+ @ti.kernel
+ def func():
+ for i in x:
+ s[None] += 1
+
+ x[0] = 1
+ x[19] = 1
+ func()
+ assert s[None] == 32
+
+ @ti.kernel
+ def deactivate():
+ ti.deactivate(ptr, 0)
+
+ deactivate()
+ s[None] = 0
+ func()
+ assert s[None] == 16
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_pointer1():
+ x = ti.field(ti.f32)
+ s = ti.field(ti.i32)
+
+ n = 16
+
+ ptr = ti.root.pointer(ti.i, n)
+ ptr.dense(ti.i, n).place(x)
+ ti.root.place(s)
+
+ s[None] = 0
+
+ @ti.kernel
+ def func():
+ for i in x:
+ s[None] += 1
+
+ x[0] = 1
+ x[19] = 1
+ x[20] = 1
+ x[45] = 1
+ func()
+ assert s[None] == 48
+
+ @ti.kernel
+ def deactivate():
+ ti.deactivate(ptr, 0)
+
+ deactivate()
+ s[None] = 0
+ func()
+ assert s[None] == 32
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_pointer2():
+ x = ti.field(ti.f32)
+
+ n = 16
+
+ ptr = ti.root.pointer(ti.i, n)
+ ptr.dense(ti.i, n).place(x)
+
+ @ti.kernel
+ def func():
+ for i in range(n * n):
+ x[i] = 1.0
+
+ @ti.kernel
+ def set10():
+ x[10] = 10.0
+
+ @ti.kernel
+ def clear():
+ for i in ptr:
+ ti.deactivate(ptr, i)
+
+ func()
+ clear()
+
+ for i in range(n * n):
+ assert x[i] == 0.0
+
+ set10()
+
+ for i in range(n * n):
+ if i != 10:
+ assert x[i] == 0.0
+ else:
+ assert x[i] == 10.0
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_pointer3():
+ x = ti.field(ti.f32)
+ x_temp = ti.field(ti.f32)
+
+ n = 16
+
+ ptr1 = ti.root.pointer(ti.ij, n)
+ ptr1.dense(ti.ij, n).place(x)
+ ptr2 = ti.root.pointer(ti.ij, n)
+ ptr2.dense(ti.ij, n).place(x_temp)
+
+ @ti.kernel
+ def fill():
+ for j in range(n * n):
+ for i in range(n * n):
+ x[i, j] = i + j
+
+ @ti.kernel
+ def fill2():
+ for i, j in x_temp:
+ if x_temp[i, j] < 100:
+ x[i, j] = x_temp[i, j]
+
+ @ti.kernel
+ def copy_to_temp():
+ for i, j in x:
+ x_temp[i, j] = x[i, j]
+
+ @ti.kernel
+ def copy_from_temp():
+ for i, j in x_temp:
+ x[i, j] = x_temp[i, j]
+
+ @ti.kernel
+ def clear():
+ for i, j in ptr1:
+ ti.deactivate(ptr1, [i, j])
+
+ @ti.kernel
+ def clear_temp():
+ for i, j in ptr2:
+ ti.deactivate(ptr2, [i, j])
+
+ fill()
+ copy_to_temp()
+ clear()
+ fill2()
+ clear_temp()
+
+ for itr in range(100):
+ copy_to_temp()
+ clear()
+ copy_from_temp()
+ clear_temp()
+
+ xn = x.to_numpy()
+ for j in range(n * n):
+ for i in range(n * n):
+ if i + j < 100:
+ assert xn[i, j] == i + j
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_dynamic():
+ x = ti.field(ti.i32)
+ s = ti.field(ti.i32)
+
+ n = 16
+
+ lst = ti.root.dense(ti.i, n).dynamic(ti.j, 4096)
+ lst.place(x)
+ ti.root.dense(ti.i, n).place(s)
+
+ @ti.kernel
+ def func(mul: ti.i32):
+ for i in range(n):
+ for j in range(i * i * mul):
+ ti.append(lst, i, j)
+
+ @ti.kernel
+ def fetch_length():
+ for i in range(n):
+ s[i] = ti.length(lst, i)
+
+ func(1)
+ fetch_length()
+ for i in range(n):
+ assert s[i] == i * i
+
+ @ti.kernel
+ def clear():
+ for i in range(n):
+ ti.deactivate(lst, [i])
+
+ func(2)
+ fetch_length()
+ for i in range(n):
+ assert s[i] == i * i * 3
+
+ clear()
+ fetch_length()
+ for i in range(n):
+ assert s[i] == 0
+
+ func(4)
+ fetch_length()
+ for i in range(n):
+ assert s[i] == i * i * 4
diff --git a/tests/_python_orig/test_sparse_linear_solver.py b/tests/_python_orig/test_sparse_linear_solver.py
new file mode 100644
index 000000000..a3a2a3057
--- /dev/null
+++ b/tests/_python_orig/test_sparse_linear_solver.py
@@ -0,0 +1,59 @@
+import numpy as np
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+"""
+The symmetric positive definite matrix is created in matlab using the following script:
+ A = diag([1,2,3,4]);
+ OrthM = [1 0 1 0; -1 -2 0 1; 0 1 -1 0; 0, 1, 0 1];
+ U = orth(OrthM);
+ Aarray = U * A * U';
+ b = [1,2,3,4]';
+ res = inv(A) * b;
+"""
+Aarray = np.array([[
+ 2.73999501130921, 0.518002544441220, 0.745119303009342, 0.0508907745638859
+], [0.518002544441220, 1.45111665837647, 0.757997555750432, 0.290885785873098],
+ [
+ 0.745119303009342, 0.757997555750432, 2.96711176987733,
+ -0.518002544441220
+ ],
+ [
+ 0.0508907745638859, 0.290885785873098,
+ -0.518002544441220, 2.84177656043698
+ ]])
+
+res = np.array([
+ -0.0754984396447588, 0.469972700892492, 1.18527357933586, 1.57686870529319
+])
+
+
+@pytest.mark.parametrize("dtype", [ti.f32])
+@pytest.mark.parametrize("solver_type", ["LLT", "LDLT", "LU"])
+@pytest.mark.parametrize("ordering", ["AMD", "COLAMD"])
+@test_utils.test(arch=ti.cpu)
+def test_sparse_LLT_solver(dtype, solver_type, ordering):
+ n = 4
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+ b = ti.field(ti.f32, shape=n)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder(),
+ InputArray: ti.ext_arr(), b: ti.template()):
+ for i, j in ti.ndrange(n, n):
+ Abuilder[i, j] += InputArray[i, j]
+ for i in range(n):
+ b[i] = i + 1
+
+ fill(Abuilder, Aarray, b)
+ A = Abuilder.build()
+ solver = ti.linalg.SparseSolver(dtype=dtype,
+ solver_type=solver_type,
+ ordering=ordering)
+ solver.analyze_pattern(A)
+ solver.factorize(A)
+ x = solver.solve(b)
+ for i in range(n):
+ assert x[i] == test_utils.approx(res[i])
diff --git a/tests/_python_orig/test_sparse_matrix.py b/tests/_python_orig/test_sparse_matrix.py
new file mode 100644
index 000000000..64576cefd
--- /dev/null
+++ b/tests/_python_orig/test_sparse_matrix.py
@@ -0,0 +1,232 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_builder_deprecated_anno():
+ n = 8
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+
+ @ti.kernel
+ def fill(Abuilder: ti.linalg.sparse_matrix_builder()):
+ for i, j in ti.ndrange(n, n):
+ Abuilder[i, j] += i + j
+
+ fill(Abuilder)
+ A = Abuilder.build()
+ for i in range(n):
+ for j in range(n):
+ assert A[i, j] == i + j
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_builder():
+ n = 8
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder()):
+ for i, j in ti.ndrange(n, n):
+ Abuilder[i, j] += i + j
+
+ fill(Abuilder)
+ A = Abuilder.build()
+ for i in range(n):
+ for j in range(n):
+ assert A[i, j] == i + j
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_shape():
+ n, m = 8, 9
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, m, max_num_triplets=100)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder()):
+ for i, j in ti.ndrange(n, m):
+ Abuilder[i, j] += i + j
+
+ fill(Abuilder)
+ A = Abuilder.build()
+ assert A.shape() == (n, m)
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_element_access():
+ n = 8
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder()):
+ for i in range(n):
+ Abuilder[i, i] += i
+
+ fill(Abuilder)
+ A = Abuilder.build()
+ for i in range(n):
+ assert A[i, i] == i
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_element_modify():
+ n = 8
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder()):
+ for i in range(n):
+ Abuilder[i, i] += i
+
+ fill(Abuilder)
+ A = Abuilder.build()
+ A[0, 0] = 1024.0
+ assert A[0, 0] == 1024.0
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_addition():
+ n = 8
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+ Bbuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder(),
+ Bbuilder: ti.types.sparse_matrix_builder()):
+ for i, j in ti.ndrange(n, n):
+ Abuilder[i, j] += i + j
+ Bbuilder[i, j] += i - j
+
+ fill(Abuilder, Bbuilder)
+ A = Abuilder.build()
+ B = Bbuilder.build()
+ C = A + B
+ for i in range(n):
+ for j in range(n):
+ assert C[i, j] == 2 * i
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_subtraction():
+ n = 8
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+ Bbuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder(),
+ Bbuilder: ti.types.sparse_matrix_builder()):
+ for i, j in ti.ndrange(n, n):
+ Abuilder[i, j] += i + j
+ Bbuilder[i, j] += i - j
+
+ fill(Abuilder, Bbuilder)
+ A = Abuilder.build()
+ B = Bbuilder.build()
+ C = A - B
+ for i in range(n):
+ for j in range(n):
+ assert C[i, j] == 2 * j
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_scalar_multiplication():
+ n = 8
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder()):
+ for i, j in ti.ndrange(n, n):
+ Abuilder[i, j] += i + j
+
+ fill(Abuilder)
+ A = Abuilder.build()
+ B = A * 3.0
+ for i in range(n):
+ for j in range(n):
+ assert B[i, j] == 3 * (i + j)
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_transpose():
+ n = 8
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder()):
+ for i, j in ti.ndrange(n, n):
+ Abuilder[i, j] += i + j
+
+ fill(Abuilder)
+ A = Abuilder.build()
+ B = A.transpose()
+ for i in range(n):
+ for j in range(n):
+ assert B[i, j] == A[j, i]
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_elementwise_multiplication():
+ n = 8
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+ Bbuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder(),
+ Bbuilder: ti.types.sparse_matrix_builder()):
+ for i, j in ti.ndrange(n, n):
+ Abuilder[i, j] += i + j
+ Bbuilder[i, j] += i - j
+
+ fill(Abuilder, Bbuilder)
+ A = Abuilder.build()
+ B = Bbuilder.build()
+ C = A * B
+ for i in range(n):
+ for j in range(n):
+ assert C[i, j] == (i + j) * (i - j)
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_multiplication():
+ n = 2
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+ Bbuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder(),
+ Bbuilder: ti.types.sparse_matrix_builder()):
+ for i, j in ti.ndrange(n, n):
+ Abuilder[i, j] += i + j
+ Bbuilder[i, j] += i - j
+
+ fill(Abuilder, Bbuilder)
+ A = Abuilder.build()
+ B = Bbuilder.build()
+ C = A @ B
+ assert C[0, 0] == 1.0
+ assert C[0, 1] == 0.0
+ assert C[1, 0] == 2.0
+ assert C[1, 1] == -1.0
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_nonsymmetric_multiplication():
+ n, k, m = 2, 3, 4
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, k, max_num_triplets=100)
+ Bbuilder = ti.linalg.SparseMatrixBuilder(k, m, max_num_triplets=100)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder(),
+ Bbuilder: ti.types.sparse_matrix_builder()):
+ for i, j in ti.ndrange(n, k):
+ Abuilder[i, j] += i + j
+ for i, j in ti.ndrange(k, m):
+ Bbuilder[i, j] -= i + j
+
+ fill(Abuilder, Bbuilder)
+ A = Abuilder.build()
+ B = Bbuilder.build()
+ C = A @ B
+ GT = [[-5, -8, -11, -14], [-8, -14, -20, -26]]
+ for i in range(n):
+ for j in range(m):
+ assert C[i, j] == GT[i][j]
diff --git a/tests/_python_orig/test_sparse_multi_tree.py b/tests/_python_orig/test_sparse_multi_tree.py
new file mode 100644
index 000000000..89ebe6a3c
--- /dev/null
+++ b/tests/_python_orig/test_sparse_multi_tree.py
@@ -0,0 +1,29 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=[ti.cpu, ti.cuda])
+def test_pointer():
+ e = ti.Vector.field(2, dtype=int, shape=16)
+
+ e[0] = ti.Vector([0, 0])
+
+ a = ti.field(float, shape=512)
+ b = ti.field(dtype=float)
+ ti.root.pointer(ti.i, 32).dense(ti.i, 16).place(b)
+
+ @ti.kernel
+ def test():
+ for i in a:
+ a[i] = i
+ for i in a:
+ b[i] += a[i]
+
+ test()
+ ti.sync()
+
+ b_np = b.to_numpy()
+ for i in range(512):
+ assert (b_np[i] == i)
diff --git a/tests/_python_orig/test_sparse_parallel.py b/tests/_python_orig/test_sparse_parallel.py
new file mode 100644
index 000000000..1a797c15c
--- /dev/null
+++ b/tests/_python_orig/test_sparse_parallel.py
@@ -0,0 +1,79 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_pointer():
+ x = ti.field(ti.f32)
+ s = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.pointer(ti.i, n).dense(ti.i, n).place(x)
+ ti.root.place(s)
+
+ @ti.kernel
+ def activate():
+ for i in range(n):
+ x[i * n] = 0
+
+ @ti.kernel
+ def func():
+ for i in x:
+ s[None] += 1
+
+ activate()
+ func()
+ assert s[None] == n * n
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_pointer2():
+ x = ti.field(ti.f32)
+ s = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.pointer(ti.i, n).dense(ti.i, n).place(x)
+ ti.root.place(s)
+
+ @ti.kernel
+ def activate():
+ for i in range(n * n):
+ x[i] = i
+
+ @ti.kernel
+ def func():
+ for i in x:
+ s[None] += i
+
+ activate()
+ func()
+ N = n * n
+ assert s[None] == N * (N - 1) / 2
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_nested_struct_fill_and_clear():
+ a = ti.field(dtype=ti.f32)
+ N = 512
+
+ ti.root.pointer(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)
+
+ @ti.kernel
+ def fill():
+ for i, j in ti.ndrange(N * 8, N * 8):
+ a[i, j] = 2.0
+
+ @ti.kernel
+ def clear():
+ for i, j in a.parent():
+ ti.deactivate(a.parent().parent(), [i, j])
+
+ def task():
+ fill()
+ clear()
+
+ for i in range(10):
+ task()
+ ti.sync()
diff --git a/tests/_python_orig/test_spmv.py b/tests/_python_orig/test_spmv.py
new file mode 100644
index 000000000..ee0c78b4f
--- /dev/null
+++ b/tests/_python_orig/test_spmv.py
@@ -0,0 +1,71 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_vector_multiplication1():
+ n = 8
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+ b = ti.field(ti.f32, shape=n)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder(), b: ti.template()):
+ for i, j in ti.ndrange(n, n):
+ Abuilder[i, j] += i
+
+ for i in range(n):
+ b[i] = 1.0
+
+ fill(Abuilder, b)
+ A = Abuilder.build()
+ x = A @ b
+ for i in range(n):
+ assert x[i] == 8 * i
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_vector_multiplication2():
+ n = 8
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+ b = ti.field(ti.f32, shape=n)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder(), b: ti.template()):
+ for i, j in ti.ndrange(n, n):
+ Abuilder[i, j] += i - j
+
+ for i in range(n):
+ b[i] = 1.0
+
+ fill(Abuilder, b)
+ A = Abuilder.build()
+
+ x = A @ b
+ import numpy as np
+ res = np.array([-28, -20, -12, -4, 4, 12, 20, 28])
+ for i in range(n):
+ assert x[i] == res[i]
+
+
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_vector_multiplication3():
+ n = 8
+ Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100)
+ b = ti.field(ti.f32, shape=n)
+
+ @ti.kernel
+ def fill(Abuilder: ti.types.sparse_matrix_builder(), b: ti.template()):
+ for i, j in ti.ndrange(n, n):
+ Abuilder[i, j] += i + j
+
+ for i in range(n):
+ b[i] = 1.0
+
+ fill(Abuilder, b)
+ A = Abuilder.build()
+
+ x = A @ b
+ import numpy as np
+ res = np.array([28, 36, 44, 52, 60, 68, 76, 84])
+ for i in range(n):
+ assert x[i] == res[i]
diff --git a/tests/_python_orig/test_ssa.py b/tests/_python_orig/test_ssa.py
new file mode 100644
index 000000000..585af0439
--- /dev/null
+++ b/tests/_python_orig/test_ssa.py
@@ -0,0 +1,75 @@
+'''
+SSA violation edge-case regression test.
+1. Ensure working well when computation result is assigned to self.
+2. Prevent duplicate-evaluation on expression with side-effect like random.
+'''
+import math
+
+import numpy as np
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_matrix_self_assign():
+ a = ti.Vector.field(2, ti.f32, ())
+ b = ti.Matrix.field(2, 2, ti.f32, ())
+ c = ti.Vector.field(2, ti.f32, ())
+
+ @ti.kernel
+ def func():
+ a[None] = a[None].normalized()
+ b[None] = b[None].transpose()
+ c[None] = ti.Vector([c[None][1], c[None][0]])
+
+ inv_sqrt2 = 1 / math.sqrt(2)
+
+ a[None] = [1, 1]
+ b[None] = [[1, 2], [3, 4]]
+ c[None] = [2, 3]
+ func()
+ assert a[None] == ti.Vector([inv_sqrt2, inv_sqrt2])
+ assert b[None] == ti.Matrix([[1, 3], [2, 4]])
+ assert c[None] == ti.Vector([3, 2])
+
+
+@test_utils.test()
+def test_random_vector_dup_eval():
+ a = ti.Vector.field(2, ti.f32, ())
+
+ @ti.kernel
+ def func():
+ a[None] = ti.Vector([ti.random(), 1]).normalized()
+
+ for i in range(4):
+ func()
+ assert a[None].norm_sqr() == test_utils.approx(1)
+
+
+@test_utils.test()
+def test_func_argument_dup_eval():
+ @ti.func
+ def func(a, t):
+ return a * t - a
+
+ @ti.kernel
+ def kern(t: ti.f32) -> ti.f32:
+ return func(ti.random(), t)
+
+ for i in range(4):
+ assert kern(1.0) == 0.0
+
+
+@test_utils.test()
+def test_func_random_argument_dup_eval():
+ @ti.func
+ def func(a):
+ return ti.Vector([ti.cos(a), ti.sin(a)])
+
+ @ti.kernel
+ def kern() -> ti.f32:
+ return func(ti.random()).norm_sqr()
+
+ for i in range(4):
+ assert kern() == test_utils.approx(1.0, rel=5e-5)
diff --git a/tests/_python_orig/test_static.py b/tests/_python_orig/test_static.py
new file mode 100644
index 000000000..767cf31ab
--- /dev/null
+++ b/tests/_python_orig/test_static.py
@@ -0,0 +1,90 @@
+import numpy as np
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@pytest.mark.parametrize('val', [0, 1])
+@test_utils.test(ti.cpu)
+def test_static_if(val):
+ x = ti.field(ti.i32)
+
+ ti.root.dense(ti.i, 1).place(x)
+
+ @ti.kernel
+ def static():
+ if ti.static(val > 0.5):
+ x[0] = 1
+ else:
+ x[0] = 0
+
+ static()
+ assert x[0] == val
+
+
+@test_utils.test(ti.cpu)
+def test_static_if_error():
+ x = ti.field(ti.i32)
+
+ ti.root.dense(ti.i, 1).place(x)
+
+ @ti.kernel
+ def static(val: float):
+ if ti.static(val > 0.5):
+ x[0] = 1
+ else:
+ x[0] = 0
+
+ with pytest.raises(ti.TaichiCompilationError,
+ match='must be compile-time constants'):
+ static(42)
+
+
+@test_utils.test()
+def test_static_ndrange():
+ n = 3
+ x = ti.Matrix.field(n, n, dtype=ti.f32, shape=(n, n))
+
+ @ti.kernel
+ def fill():
+ w = [0, 1, 2]
+ for i, j in ti.static(ti.ndrange(3, 3)):
+ x[i, j][i, j] = w[i] + w[j] * 2
+
+ fill()
+ for i in range(3):
+ for j in range(3):
+ assert x[i, j][i, j] == i + j * 2
+
+
+@test_utils.test(ti.cpu)
+def test_static_break():
+ x = ti.field(ti.i32, 5)
+
+ @ti.kernel
+ def func():
+ for i in ti.static(range(5)):
+ x[i] = 1
+ if ti.static(i == 2):
+ break
+
+ func()
+
+ assert np.allclose(x.to_numpy(), np.array([1, 1, 1, 0, 0]))
+
+
+@test_utils.test(ti.cpu)
+def test_static_continue():
+ x = ti.field(ti.i32, 5)
+
+ @ti.kernel
+ def func():
+ for i in ti.static(range(5)):
+ if ti.static(i == 2):
+ continue
+ x[i] = 1
+
+ func()
+
+ assert np.allclose(x.to_numpy(), np.array([1, 1, 0, 1, 1]))
diff --git a/tests/_python_orig/test_stencils.py b/tests/_python_orig/test_stencils.py
new file mode 100644
index 000000000..3f34a6d5c
--- /dev/null
+++ b/tests/_python_orig/test_stencils.py
@@ -0,0 +1,25 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_simple():
+ # Note: access simplification does not work in this case. Maybe worth fixing.
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.dense(ti.i, n).place(x, y)
+
+ @ti.kernel
+ def run():
+ for i in range(n - 1):
+ x[i] = 1
+ y[i + 1] = 2
+
+ run()
+
+ for i in range(n - 1):
+ assert x[i] == 1
+ assert y[i + 1] == 2
diff --git a/tests/_python_orig/test_stop_grad.py b/tests/_python_orig/test_stop_grad.py
new file mode 100644
index 000000000..450782b37
--- /dev/null
+++ b/tests/_python_orig/test_stop_grad.py
@@ -0,0 +1,86 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_normal_grad():
+ x = ti.field(ti.f32)
+ loss = ti.field(ti.f32)
+
+ n = 128
+
+ ti.root.dense(ti.i, n).place(x)
+ ti.root.place(loss)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ loss[None] += x[i]**2
+
+ for i in range(n):
+ x[i] = i
+
+ with ti.Tape(loss):
+ func()
+
+ for i in range(n):
+ assert x.grad[i] == i * 2
+
+
+@test_utils.test()
+def test_stop_grad():
+ x = ti.field(ti.f32)
+ loss = ti.field(ti.f32)
+
+ n = 128
+
+ ti.root.dense(ti.i, n).place(x)
+ ti.root.place(loss)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ for i in range(n):
+ ti.stop_grad(x)
+ loss[None] += x[i]**2
+
+ for i in range(n):
+ x[i] = i
+
+ with ti.Tape(loss):
+ func()
+
+ for i in range(n):
+ assert x.grad[i] == 0
+
+
+@test_utils.test()
+def test_stop_grad2():
+ x = ti.field(ti.f32)
+ loss = ti.field(ti.f32)
+
+ n = 128
+
+ ti.root.dense(ti.i, n).place(x)
+ ti.root.place(loss)
+ ti.root.lazy_grad()
+
+ @ti.kernel
+ def func():
+ # Two loops, one with stop grad on without
+ for i in range(n):
+ ti.stop_grad(x)
+ loss[None] += x[i]**2
+ for i in range(n):
+ loss[None] += x[i]**2
+
+ for i in range(n):
+ x[i] = i
+
+ with ti.Tape(loss):
+ func()
+
+ # If without stop, grad x.grad[i] = i * 4
+ for i in range(n):
+ assert x.grad[i] == i * 2
diff --git a/tests/_python_orig/test_struct.py b/tests/_python_orig/test_struct.py
new file mode 100644
index 000000000..4a1343045
--- /dev/null
+++ b/tests/_python_orig/test_struct.py
@@ -0,0 +1,80 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_linear():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.dense(ti.i, n).place(x)
+ ti.root.dense(ti.i, n).place(y)
+
+ for i in range(n):
+ x[i] = i
+ y[i] = i + 123
+
+ for i in range(n):
+ assert x[i] == i
+ assert y[i] == i + 123
+
+
+def test_linear_repeated():
+ for i in range(10):
+ test_linear()
+
+
+@test_utils.test()
+def test_linear_nested():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.dense(ti.i, n // 16).dense(ti.i, 16).place(x)
+ ti.root.dense(ti.i, n // 16).dense(ti.i, 16).place(y)
+
+ for i in range(n):
+ x[i] = i
+ y[i] = i + 123
+
+ for i in range(n):
+ assert x[i] == i
+ assert y[i] == i + 123
+
+
+@test_utils.test()
+def test_linear_nested_aos():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.dense(ti.i, n // 16).dense(ti.i, 16).place(x, y)
+
+ for i in range(n):
+ x[i] = i
+ y[i] = i + 123
+
+ for i in range(n):
+ assert x[i] == i
+ assert y[i] == i + 123
+
+
+@test_utils.test(exclude=[ti.vulkan])
+def test_2d_nested():
+ x = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.dense(ti.ij, n // 16).dense(ti.ij, (32, 16)).place(x)
+
+ for i in range(n * 2):
+ for j in range(n):
+ x[i, j] = i + j * 10
+
+ for i in range(n * 2):
+ for j in range(n):
+ assert x[i, j] == i + j * 10
diff --git a/tests/_python_orig/test_struct_for.py b/tests/_python_orig/test_struct_for.py
new file mode 100644
index 000000000..95ce85ab2
--- /dev/null
+++ b/tests/_python_orig/test_struct_for.py
@@ -0,0 +1,315 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_singleton():
+ x = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def fill():
+ for I in ti.grouped(x):
+ x[I] = 3
+
+ fill()
+
+ assert x[None] == 3
+
+
+@test_utils.test()
+def test_singleton2():
+ x = ti.field(ti.i32)
+
+ ti.root.place(x)
+
+ @ti.kernel
+ def fill():
+ for I in ti.grouped(x):
+ x[I] = 3
+
+ fill()
+
+ assert x[None] == 3
+
+
+@test_utils.test()
+def test_linear():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.dense(ti.i, n).place(x)
+ ti.root.dense(ti.i, n).place(y)
+
+ @ti.kernel
+ def fill():
+ for i in x:
+ x[i] = i
+ y[i] = i * 2
+
+ fill()
+
+ for i in range(n):
+ assert x[i] == i
+ assert y[i] == i * 2
+
+
+@test_utils.test()
+def test_nested():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.dense(ti.i, n // 4).dense(ti.i, 4).place(x)
+ ti.root.dense(ti.i, n).place(y)
+
+ @ti.kernel
+ def fill():
+ for i in x:
+ x[i] = i
+ y[i] = i * 2
+
+ fill()
+
+ for i in range(n):
+ assert x[i] == i
+ assert y[i] == i * 2
+
+
+@test_utils.test()
+def test_nested2():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+
+ n = 2048
+
+ ti.root.dense(ti.i, n // 512).dense(ti.i, 16).dense(ti.i,
+ 8).dense(ti.i,
+ 4).place(x)
+ ti.root.dense(ti.i, n).place(y)
+
+ @ti.kernel
+ def fill():
+ for i in x:
+ x[i] = i
+ y[i] = i * 2
+
+ fill()
+
+ for i in range(n):
+ assert x[i] == i
+ assert y[i] == i * 2
+
+
+@test_utils.test()
+def test_2d():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+
+ n, m = 32, 16
+
+ ti.root.dense(ti.ij, n).place(x, y)
+
+ @ti.kernel
+ def fill():
+ for i, j in x:
+ x[i, j] = i + j * 2
+
+ fill()
+
+ for i in range(n):
+ for j in range(m):
+ assert x[i, j] == i + j * 2
+
+
+@test_utils.test()
+def test_2d_non_POT():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32, shape=())
+
+ n, m = 13, 17
+
+ ti.root.dense(ti.ij, (n, m)).place(x)
+
+ @ti.kernel
+ def fill():
+ for i, j in x:
+ y[None] += i + j * j
+
+ fill()
+
+ tot = 0
+ for i in range(n):
+ for j in range(m):
+ tot += i + j * j
+ assert y[None] == tot
+
+
+@test_utils.test()
+def test_nested_2d():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+
+ n = 32
+
+ ti.root.dense(ti.ij, n // 4).dense(ti.ij, 4).place(x, y)
+
+ @ti.kernel
+ def fill():
+ for i, j in x:
+ x[i, j] = i + j * 2
+
+ fill()
+
+ for i in range(n):
+ for j in range(n):
+ assert x[i, j] == i + j * 2
+
+
+@test_utils.test()
+def test_nested_2d_more_nests():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32)
+
+ n = 64
+
+ ti.root.dense(ti.ij, n // 16).dense(ti.ij,
+ 2).dense(ti.ij,
+ 4).dense(ti.ij,
+ 2).place(x, y)
+
+ @ti.kernel
+ def fill():
+ for i, j in x:
+ x[i, j] = i + j * 2
+
+ fill()
+
+ for i in range(n):
+ for j in range(n):
+ assert x[i, j] == i + j * 2
+
+
+@test_utils.test()
+def test_linear_k():
+ x = ti.field(ti.i32)
+
+ n = 128
+
+ ti.root.dense(ti.k, n).place(x)
+
+ @ti.kernel
+ def fill():
+ for i in x:
+ x[i] = i
+
+ fill()
+
+ for i in range(n):
+ assert x[i] == i
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_struct_for_branching():
+ # Related issue: https://github.com/taichi-dev/taichi/issues/704
+ x = ti.field(dtype=ti.i32)
+ y = ti.field(dtype=ti.i32)
+ ti.root.pointer(ti.ij, 128 // 4).dense(ti.ij, 4).place(x, y)
+
+ @ti.kernel
+ def func1():
+ for i, j in x:
+ if x[i, j] & 2 == 2:
+ y[i, j] = 1
+
+ @ti.kernel
+ def func2():
+ for i, j in x:
+ if x[i, j] == 2 or x[i, j] == 4:
+ y[i, j] = 1
+
+ @ti.kernel
+ def func3():
+ for i, j in x:
+ if x[i, j] & 2 == 2 or x[i, j] & 4 == 4:
+ y[i, j] = 1
+
+ func1()
+ func2()
+ func3()
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_struct_for_pointer_block():
+ n = 16
+ block_size = 8
+
+ f = ti.field(dtype=ti.f32)
+
+ block = ti.root.pointer(ti.ijk, n // block_size)
+ block.dense(ti.ijk, block_size).place(f)
+
+ f[0, 2, 3] = 1
+
+ @ti.kernel
+ def count() -> int:
+ tot = 0
+ for I in ti.grouped(block):
+ tot += 1
+ return tot
+
+ assert count() == 1
+
+
+@test_utils.test(require=ti.extension.quant)
+def test_struct_for_quant():
+ n = 8
+
+ ci13 = ti.types.quantized_types.quant.int(13, True)
+ x = ti.field(dtype=ci13)
+
+ ti.root.dense(ti.i, n).bit_struct(num_bits=32).place(x)
+
+ @ti.kernel
+ def count() -> int:
+ tot = 0
+ for i in x:
+ tot += i
+ return tot
+
+ assert count() == 28
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_struct_for_continue():
+ # Related issue: https://github.com/taichi-dev/taichi/issues/3272
+ x = ti.field(dtype=ti.i32)
+ n = 4
+ ti.root.pointer(ti.i, n).dense(ti.i, n).place(x)
+
+ @ti.kernel
+ def init():
+ for i in range(n):
+ x[i * n + i] = 1
+
+ @ti.kernel
+ def struct_for_continue() -> ti.i32:
+ cnt = 0
+ for i in x:
+ if x[i]: continue
+ cnt += 1
+ return cnt
+
+ @ti.kernel
+ def range_for_continue() -> ti.i32:
+ cnt = 0
+ for i in range(n * n):
+ if x[i]: continue
+ cnt += 1
+ return cnt
+
+ init()
+ assert struct_for_continue() == n * (n - 1)
+ assert range_for_continue() == n * (n - 1)
diff --git a/tests/_python_orig/test_struct_for_dynamic.py b/tests/_python_orig/test_struct_for_dynamic.py
new file mode 100644
index 000000000..67595a6e0
--- /dev/null
+++ b/tests/_python_orig/test_struct_for_dynamic.py
@@ -0,0 +1,44 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(exclude=[ti.opengl, ti.cc, ti.vulkan])
+def test_dynamic():
+ x = ti.field(ti.i32)
+ y = ti.field(ti.i32, shape=())
+
+ n = 128
+
+ ti.root.dynamic(ti.i, n).place(x)
+
+ @ti.kernel
+ def count():
+ for i in x:
+ y[None] += 1
+
+ x[n // 3] = 1
+
+ count()
+
+ assert y[None] == n // 3 + 1
+
+
+@test_utils.test(exclude=[ti.opengl, ti.cc, ti.vulkan])
+def test_dense_dynamic():
+ n = 128
+
+ x = ti.field(ti.i32)
+
+ ti.root.dense(ti.i, n).dynamic(ti.j, n, 128).place(x)
+
+ @ti.kernel
+ def append():
+ for i in range(n):
+ for j in range(i):
+ ti.append(x.parent(), i, j * 2)
+
+ append()
+
+ for i in range(n):
+ for j in range(i):
+ assert x[i, j] == j * 2
diff --git a/tests/_python_orig/test_struct_for_intermediate.py b/tests/_python_orig/test_struct_for_intermediate.py
new file mode 100644
index 000000000..4971f20f1
--- /dev/null
+++ b/tests/_python_orig/test_struct_for_intermediate.py
@@ -0,0 +1,47 @@
+import taichi as ti
+from tests import test_utils
+
+
+def _test_nested():
+ x = ti.field(ti.i32)
+
+ p, q = 3, 7
+ n, m = 2, 4
+
+ ti.root.dense(ti.ij, (p, q)).dense(ti.ij, (n, m)).place(x)
+
+ @ti.kernel
+ def iterate():
+ for i, j in x.parent():
+ x[i, j] += 1
+
+ iterate()
+ for i in range(p):
+ for j in range(q):
+ assert x[i * n, j * m] == 1, (i, j)
+
+
+@test_utils.test(require=ti.extension.sparse,
+ demote_dense_struct_fors=False,
+ packed=False)
+def test_nested():
+ _test_nested()
+
+
+@test_utils.test(demote_dense_struct_fors=True, packed=False)
+def test_nested_demote():
+ _test_nested()
+
+
+@test_utils.test(require=[ti.extension.sparse, ti.extension.packed],
+ demote_dense_struct_fors=False,
+ packed=True)
+def test_nested_packed():
+ _test_nested()
+
+
+@test_utils.test(require=ti.extension.packed,
+ demote_dense_struct_fors=True,
+ packed=True)
+def test_nested_demote_packed():
+ _test_nested()
diff --git a/tests/_python_orig/test_struct_for_non_pot.py b/tests/_python_orig/test_struct_for_non_pot.py
new file mode 100644
index 000000000..c8eefbe3d
--- /dev/null
+++ b/tests/_python_orig/test_struct_for_non_pot.py
@@ -0,0 +1,68 @@
+import taichi as ti
+from tests import test_utils
+
+
+def _test_1d():
+ x = ti.field(ti.i32)
+ sum = ti.field(ti.i32)
+
+ n = 100
+
+ ti.root.dense(ti.k, n).place(x)
+ ti.root.place(sum)
+
+ @ti.kernel
+ def accumulate():
+ for i in x:
+ ti.atomic_add(sum[None], i)
+
+ accumulate()
+
+ for i in range(n):
+ assert sum[None] == 4950
+
+
+@test_utils.test()
+def test_1d():
+ _test_1d()
+
+
+@test_utils.test(require=ti.extension.packed, packed=True)
+def test_1d_packed():
+ _test_1d()
+
+
+def _test_2d():
+ x = ti.field(ti.i32)
+ sum = ti.field(ti.i32)
+
+ n = 100
+ m = 19
+
+ ti.root.dense(ti.k, n).dense(ti.i, m).place(x)
+ ti.root.place(sum)
+
+ @ti.kernel
+ def accumulate():
+ for i, k in x:
+ ti.atomic_add(sum[None], i + k * 2)
+
+ gt = 0
+ for k in range(n):
+ for i in range(m):
+ gt += i + k * 2
+
+ accumulate()
+
+ for i in range(n):
+ assert sum[None] == gt
+
+
+@test_utils.test()
+def test_2d():
+ _test_2d()
+
+
+@test_utils.test(require=ti.extension.packed, packed=True)
+def test_2d_packed():
+ _test_2d()
diff --git a/tests/_python_orig/test_svd.py b/tests/_python_orig/test_svd.py
new file mode 100644
index 000000000..988dc764a
--- /dev/null
+++ b/tests/_python_orig/test_svd.py
@@ -0,0 +1,90 @@
+import numpy as np
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(require=ti.extension.data64, fast_math=False)
+def test_precision():
+ u = ti.field(ti.f64, shape=())
+ v = ti.field(ti.f64, shape=())
+ w = ti.field(ti.f64, shape=())
+
+ @ti.kernel
+ def forward():
+ v[None] = ti.sqrt(ti.cast(u[None] + 3.25, ti.f64))
+ w[None] = ti.cast(u[None] + 7, ti.f64) / ti.cast(u[None] + 3, ti.f64)
+
+ forward()
+ assert v[None]**2 == test_utils.approx(3.25, abs=1e-12)
+ assert w[None] * 3 == test_utils.approx(7, abs=1e-12)
+
+
+def mat_equal(A, B, tol=1e-6):
+ return np.max(np.abs(A - B)) < tol
+
+
+def _test_svd(dt, n):
+ print(
+ f'arch={ti.lang.impl.current_cfg().arch} default_fp={ti.lang.impl.current_cfg().default_fp} fast_math={ti.lang.impl.current_cfg().fast_math} dim={n}'
+ )
+ A = ti.Matrix.field(n, n, dtype=dt, shape=())
+ A_reconstructed = ti.Matrix.field(n, n, dtype=dt, shape=())
+ U = ti.Matrix.field(n, n, dtype=dt, shape=())
+ UtU = ti.Matrix.field(n, n, dtype=dt, shape=())
+ sigma = ti.Matrix.field(n, n, dtype=dt, shape=())
+ V = ti.Matrix.field(n, n, dtype=dt, shape=())
+ VtV = ti.Matrix.field(n, n, dtype=dt, shape=())
+
+ @ti.kernel
+ def run():
+ U[None], sigma[None], V[None] = ti.svd(A[None], dt)
+ UtU[None] = U[None].transpose() @ U[None]
+ VtV[None] = V[None].transpose() @ V[None]
+ A_reconstructed[None] = U[None] @ sigma[None] @ V[None].transpose()
+
+ if n == 3:
+ A[None] = [[1, 1, 3], [9, -3, 2], [-3, 4, 2]]
+ else:
+ A[None] = [[1, 1], [2, 3]]
+
+ run()
+
+ tol = 1e-5 if dt == ti.f32 else 1e-12
+
+ assert mat_equal(UtU.to_numpy(), np.eye(n), tol=tol)
+ assert mat_equal(VtV.to_numpy(), np.eye(n), tol=tol)
+ assert mat_equal(A_reconstructed.to_numpy(), A.to_numpy(), tol=tol)
+ for i in range(n):
+ for j in range(n):
+ if i != j:
+ assert sigma[None][i, j] == test_utils.approx(0)
+
+
+def test_svd():
+ for fp in [ti.f32, ti.f64]:
+ for d in [2, 3]:
+
+ @test_utils.test(
+ require=ti.extension.data64 if fp == ti.f64 else [],
+ default_fp=fp,
+ fast_math=False)
+ def wrapped():
+ _test_svd(fp, d)
+
+ wrapped()
+
+
+@test_utils.test()
+def test_transpose_no_loop():
+ A = ti.Matrix.field(3, 3, dtype=ti.f32, shape=())
+ U = ti.Matrix.field(3, 3, dtype=ti.f32, shape=())
+ sigma = ti.Matrix.field(3, 3, dtype=ti.f32, shape=())
+ V = ti.Matrix.field(3, 3, dtype=ti.f32, shape=())
+
+ @ti.kernel
+ def run():
+ U[None], sigma[None], V[None] = ti.svd(A[None])
+
+ run()
+ # As long as it passes compilation we are good
diff --git a/tests/_python_orig/test_sync.py b/tests/_python_orig/test_sync.py
new file mode 100644
index 000000000..feec84008
--- /dev/null
+++ b/tests/_python_orig/test_sync.py
@@ -0,0 +1,27 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_kernel_sync():
+ n = 128
+ x = ti.field(ti.i32, shape=(3, ))
+ y = ti.field(ti.i32, shape=(n, ))
+ # These [] calls are all on CPU, so no synchronization needed
+ x[0] = 42
+ assert x[0] == 42
+ x[1] = 233
+ x[2] = -1
+
+ @ti.kernel
+ def func():
+ for i in y:
+ y[i] = x[i % 3]
+
+ # Kernel *may* run on GPU
+ # Note that the previous kernel is a write, which didn't do a sync. But that
+ # should be fine -- we only need to sync the memory after GPU -> CPU.
+ func()
+ # These [] calls are on CPU. They should be smart enough to sync only once.
+ for i in range(n):
+ assert y[i] == x[i % 3]
diff --git a/tests/_python_orig/test_syntax_errors.py b/tests/_python_orig/test_syntax_errors.py
new file mode 100644
index 000000000..6ecc89809
--- /dev/null
+++ b/tests/_python_orig/test_syntax_errors.py
@@ -0,0 +1,330 @@
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_try():
+ x = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, 1).place(x)
+
+ @ti.kernel
+ def func():
+ try:
+ a = 0
+ except:
+ a = 1
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test()
+def test_for_else():
+ x = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, 1).place(x)
+
+ @ti.kernel
+ def func():
+ for i in range(10):
+ pass
+ else:
+ pass
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test()
+def test_while_else():
+ x = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, 1).place(x)
+
+ @ti.kernel
+ def func():
+ while True:
+ pass
+ else:
+ pass
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test()
+def test_raise():
+ @ti.kernel
+ def foo():
+ raise Exception()
+
+ with pytest.raises(ti.TaichiSyntaxError,
+ match='Unsupported node "Raise"') as e:
+ foo()
+
+
+@test_utils.test()
+def test_loop_var_range():
+ x = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, 1).place(x)
+
+ @ti.kernel
+ def func():
+ i = 0
+ for i in range(10):
+ pass
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test()
+def test_loop_var_struct():
+ x = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, 1).place(x)
+
+ @ti.kernel
+ def func():
+ i = 0
+ for i in x:
+ pass
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test()
+def test_loop_var_struct():
+ x = ti.field(ti.f32)
+
+ ti.root.dense(ti.i, 1).place(x)
+
+ @ti.kernel
+ def func():
+ j = 0
+ for i, j in x:
+ pass
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test()
+def test_func_def_in_kernel():
+ @ti.kernel
+ def kernel():
+ @ti.func
+ def func():
+ return 1
+
+ print(func())
+
+ with pytest.raises(ti.TaichiCompilationError):
+ kernel()
+
+
+@test_utils.test()
+def test_func_def_in_func():
+ @ti.func
+ def func():
+ @ti.func
+ def func2():
+ return 1
+
+ return func2()
+
+ @ti.kernel
+ def kernel():
+ print(func())
+
+ with pytest.raises(ti.TaichiCompilationError):
+ kernel()
+
+
+@test_utils.test(arch=ti.cpu)
+def test_kernel_bad_argument_annotation():
+ with pytest.raises(ti.TaichiSyntaxError, match='annotation'):
+
+ @ti.kernel
+ def kernel(x: 'bar'):
+ print(x)
+
+
+@test_utils.test(arch=ti.cpu)
+def test_func_bad_argument_annotation():
+ with pytest.raises(ti.TaichiSyntaxError, match='annotation'):
+
+ @ti.func
+ def func(x: 'foo'):
+ print(x)
+
+
+@test_utils.test()
+def test_nested_static():
+ @ti.kernel
+ def func():
+ for i in ti.static(ti.static(range(1))):
+ pass
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test()
+def test_nested_grouped():
+ @ti.kernel
+ def func():
+ for i in ti.grouped(ti.grouped(range(1))):
+ pass
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test()
+def test_nested_ndrange():
+ @ti.kernel
+ def func():
+ for i in ti.ndrange(ti.ndrange(1)):
+ pass
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test()
+def test_static_grouped_struct_for():
+ val = ti.field(ti.i32)
+
+ ti.root.dense(ti.ij, (1, 1)).place(val)
+
+ @ti.kernel
+ def test():
+ for I in ti.static(ti.grouped(val)):
+ pass
+
+ with pytest.raises(ti.TaichiCompilationError):
+ test()
+
+
+@test_utils.test()
+def test_is():
+ b = ti.field(ti.i32, shape=())
+ c = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ a = b is c
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test()
+def test_is_not():
+ b = ti.field(ti.i32, shape=())
+ c = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ a = b is not c
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test()
+def test_in():
+ b = ti.field(ti.i32, shape=())
+ c = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ a = b in c
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test()
+def test_not_in():
+ b = ti.field(ti.i32, shape=())
+ c = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ a = b not in c
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test()
+def test_expr_set():
+ @ti.kernel
+ def func():
+ x = {2, 4, 6}
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test()
+def test_redefining_template_args():
+ @ti.kernel
+ def foo(a: ti.template()):
+ a = 5
+
+ with pytest.raises(
+ ti.TaichiSyntaxError,
+ match=
+ "Variable 'a' cannot be assigned. Maybe it is not a Taichi object?"
+ ):
+ foo(1)
+
+
+@test_utils.test()
+def test_break_in_outermost_for():
+ @ti.kernel
+ def foo():
+ for i in range(10):
+ break
+
+ with pytest.raises(ti.TaichiSyntaxError,
+ match="Cannot break in the outermost loop"):
+ foo()
+
+
+@test_utils.test()
+def test_funcdef_in_kernel():
+ @ti.kernel
+ def foo():
+ def bar():
+ pass
+
+ with pytest.raises(
+ ti.TaichiSyntaxError,
+ match="Function definition is not allowed in 'ti.kernel'"):
+ foo()
+
+
+@test_utils.test()
+def test_funcdef_in_func():
+ @ti.func
+ def foo():
+ def bar():
+ pass
+
+ @ti.kernel
+ def baz():
+ foo()
+
+ with pytest.raises(
+ ti.TaichiSyntaxError,
+ match="Function definition is not allowed in 'ti.func'"):
+ baz()
diff --git a/tests/_python_orig/test_tensor_dimensionality.py b/tests/_python_orig/test_tensor_dimensionality.py
new file mode 100644
index 000000000..5eb079a09
--- /dev/null
+++ b/tests/_python_orig/test_tensor_dimensionality.py
@@ -0,0 +1,32 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def _test_dimensionality(d):
+ x = ti.Vector.field(2, dtype=ti.i32, shape=(2, ) * d)
+
+ @ti.kernel
+ def fill():
+ for I in ti.grouped(x):
+ x[I] += ti.Vector([I.sum(), I[0]])
+
+ for i in range(2**d):
+ indices = []
+ for j in range(d):
+ indices.append(i // (2**j) % 2)
+ x.__getitem__(tuple(indices))[0] = sum(indices) * 2
+ fill()
+ # FIXME(yuanming-hu): snode_writer needs 9 arguments actually..
+ if ti.lang.impl.current_cfg().arch == ti.cc and d >= 8:
+ return
+ for i in range(2**d):
+ indices = []
+ for j in range(d):
+ indices.append(i // (2**j) % 2)
+ assert x.__getitem__(tuple(indices))[0] == sum(indices) * 3
+
+
+def test_dimensionality():
+ for i in range(2, ti._lib.core.get_max_num_indices() + 1):
+ _test_dimensionality(i)
diff --git a/tests/_python_orig/test_tensor_reflection.py b/tests/_python_orig/test_tensor_reflection.py
new file mode 100644
index 000000000..ae662418b
--- /dev/null
+++ b/tests/_python_orig/test_tensor_reflection.py
@@ -0,0 +1,115 @@
+import pytest
+from taichi.lang import impl
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_POT():
+ val = ti.field(ti.i32)
+
+ n = 4
+ m = 8
+ p = 16
+
+ ti.root.dense(ti.i, n).dense(ti.j, m).dense(ti.k, p).place(val)
+
+ assert val.shape == (n, m, p)
+ assert val.dtype == ti.i32
+
+
+@test_utils.test()
+def test_non_POT():
+ val = ti.field(ti.i32)
+
+ n = 3
+ m = 7
+ p = 11
+
+ blk1 = ti.root.dense(ti.i, n)
+ blk2 = blk1.dense(ti.j, m)
+ blk3 = blk2.dense(ti.k, p)
+ blk3.place(val)
+
+ assert val.shape == (n, m, p)
+ assert val.dtype == ti.i32
+
+
+@test_utils.test()
+def test_unordered():
+ val = ti.field(ti.i32)
+
+ n = 3
+ m = 7
+ p = 11
+
+ blk1 = ti.root.dense(ti.k, n)
+ blk2 = blk1.dense(ti.i, m)
+ blk3 = blk2.dense(ti.j, p)
+ blk3.place(val)
+
+ assert val.dtype == ti.i32
+ assert val.shape == (m, p, n)
+ assert val.snode.parent(0) == val.snode
+ assert val.snode.parent() == blk3
+ assert val.snode.parent(1) == blk3
+ assert val.snode.parent(2) == blk2
+ assert val.snode.parent(3) == blk1
+ assert val.snode.parent(4) == ti.root
+
+ assert val.snode in blk3._get_children()
+ assert blk3 in blk2._get_children()
+ assert blk2 in blk1._get_children()
+ impl.get_runtime().materialize_root_fb(False)
+ assert blk1 in ti.FieldsBuilder._finalized_roots()[0]._get_children()
+
+ expected_str = f'ti.root => dense {[n]} => dense {[m, n]}' \
+ f' => dense {[m, p, n]} => place {[m, p, n]}'
+ assert str(val.snode) == expected_str
+
+
+@test_utils.test()
+def test_unordered_matrix():
+ val = ti.Matrix.field(3, 2, ti.i32)
+
+ n = 3
+ m = 7
+ p = 11
+
+ blk1 = ti.root.dense(ti.k, n)
+ blk2 = blk1.dense(ti.i, m)
+ blk3 = blk2.dense(ti.j, p)
+ blk3.place(val)
+
+ assert val.shape == (m, p, n)
+ assert val.dtype == ti.i32
+ assert val.snode.parent(0) == val.snode
+ assert val.snode.parent() == blk3
+ assert val.snode.parent(1) == blk3
+ assert val.snode.parent(2) == blk2
+ assert val.snode.parent(3) == blk1
+ assert val.snode.parent(4) == ti.root
+ assert val.snode._path_from_root() == [
+ ti.root, blk1, blk2, blk3, val.snode
+ ]
+
+
+@test_utils.test()
+def test_parent_exceeded():
+ val = ti.field(ti.f32)
+
+ m = 7
+ n = 3
+
+ blk1 = ti.root.dense(ti.i, m)
+ blk2 = blk1.dense(ti.j, n)
+ blk2.place(val)
+
+ assert val.snode.parent() == blk2
+ assert val.snode.parent(2) == blk1
+ assert val.snode.parent(3) == ti.root
+ assert val.snode.parent(4) == None
+ assert val.snode.parent(42) == None
+
+ assert ti.root.parent() == None
diff --git a/tests/_python_orig/test_test.py b/tests/_python_orig/test_test.py
new file mode 100644
index 000000000..c5444904c
--- /dev/null
+++ b/tests/_python_orig/test_test.py
@@ -0,0 +1,141 @@
+'''
+This file tests if Taichi's testing utilities are functional.
+
+TODO: Skips these tests after all tests are using @ti.test
+'''
+import os
+
+import pytest
+
+import taichi as ti
+from tests import test_utils
+
+### `ti.test`
+
+
+@test_utils.test()
+def test_all_archs():
+ assert ti.lang.impl.current_cfg().arch in test_utils.expected_archs()
+
+
+@test_utils.test(arch=ti.cpu)
+def test_arch_cpu():
+ assert ti.lang.impl.current_cfg().arch in [ti.cpu]
+
+
+@test_utils.test(arch=[ti.cpu])
+def test_arch_list_cpu():
+ assert ti.lang.impl.current_cfg().arch in [ti.cpu]
+
+
+@test_utils.test(exclude=ti.cpu)
+def test_exclude_cpu():
+ assert ti.lang.impl.current_cfg().arch not in [ti.cpu]
+
+
+@test_utils.test(exclude=[ti.cpu])
+def test_exclude_list_cpu():
+ assert ti.lang.impl.current_cfg().arch not in [ti.cpu]
+
+
+@test_utils.test(arch=ti.opengl)
+def test_arch_opengl():
+ assert ti.lang.impl.current_cfg().arch in [ti.opengl]
+
+
+@test_utils.test(arch=[ti.cpu, ti.opengl, ti.metal])
+def test_multiple_archs():
+ assert ti.lang.impl.current_cfg().arch in [ti.cpu, ti.opengl, ti.metal]
+
+
+@test_utils.test(arch=ti.cpu, debug=True, advanced_optimization=False)
+def test_init_args():
+ assert ti.lang.impl.current_cfg().debug == True
+ assert ti.lang.impl.current_cfg().advanced_optimization == False
+
+
+@test_utils.test(require=ti.extension.sparse)
+def test_require_extensions_1():
+ assert ti.lang.impl.current_cfg().arch in [ti.cpu, ti.cuda, ti.metal]
+
+
+@test_utils.test(arch=[ti.cpu, ti.opengl], require=ti.extension.sparse)
+def test_require_extensions_2():
+ assert ti.lang.impl.current_cfg().arch in [ti.cpu]
+
+
+@test_utils.test(arch=[ti.cpu, ti.opengl],
+ require=[ti.extension.sparse, ti.extension.bls])
+def test_require_extensions_2():
+ assert ti.lang.impl.current_cfg().arch in [ti.cuda]
+
+
+### `test_utils.approx` and `test_utils.allclose`
+
+
+@pytest.mark.parametrize('x', [0.1, 3])
+@pytest.mark.parametrize(
+ 'allclose', [test_utils.allclose, lambda x, y: x == test_utils.approx(y)])
+@test_utils.test()
+def test_allclose_rel(x, allclose):
+ rel = test_utils.get_rel_eps()
+ assert not allclose(x + x * rel * 3.0, x)
+ assert not allclose(x + x * rel * 1.2, x)
+ assert allclose(x + x * rel * 0.9, x)
+ assert allclose(x + x * rel * 0.5, x)
+ assert allclose(x, x)
+ assert allclose(x - x * rel * 0.5, x)
+ assert allclose(x - x * rel * 0.9, x)
+ assert not allclose(x - x * rel * 1.2, x)
+ assert not allclose(x - x * rel * 3.0, x)
+
+
+@pytest.mark.parametrize('x', [0.1, 3])
+@pytest.mark.parametrize(
+ 'allclose', [test_utils.allclose, lambda x, y: x == test_utils.approx(y)])
+@test_utils.test()
+def test_allclose_rel_reordered1(x, allclose):
+ rel = test_utils.get_rel_eps()
+ assert not allclose(x + x * rel * 3.0, x)
+ assert not allclose(x + x * rel * 1.2, x)
+ assert allclose(x + x * rel * 0.9, x)
+ assert allclose(x + x * rel * 0.5, x)
+ assert allclose(x, x)
+ assert allclose(x - x * rel * 0.5, x)
+ assert allclose(x - x * rel * 0.9, x)
+ assert not allclose(x - x * rel * 1.2, x)
+ assert not allclose(x - x * rel * 3.0, x)
+
+
+@pytest.mark.parametrize('x', [0.1, 3])
+@pytest.mark.parametrize(
+ 'allclose', [test_utils.allclose, lambda x, y: x == test_utils.approx(y)])
+@test_utils.test()
+def test_allclose_rel_reordered2(x, allclose):
+ rel = test_utils.get_rel_eps()
+ assert not allclose(x + x * rel * 3.0, x)
+ assert not allclose(x + x * rel * 1.2, x)
+ assert allclose(x + x * rel * 0.9, x)
+ assert allclose(x + x * rel * 0.5, x)
+ assert allclose(x, x)
+ assert allclose(x - x * rel * 0.5, x)
+ assert allclose(x - x * rel * 0.9, x)
+ assert not allclose(x - x * rel * 1.2, x)
+ assert not allclose(x - x * rel * 3.0, x)
+
+
+@pytest.mark.skipif(ti._lib.core.with_metal(),
+ reason="Skip metal because metal is used as the example")
+def test_disable_fallback():
+ old_environ = os.environ.get('TI_WANTED_ARCHS', '')
+ os.environ['TI_WANTED_ARCHS'] = "metal"
+
+ with pytest.raises(RuntimeError):
+
+ @test_utils.test(ti.metal)
+ def test():
+ pass
+
+ test()
+ os.environ['TI_WANTED_ARCHS'] = old_environ
+ os.environ['TI_WANTED_ARCHS'] = old_environ
diff --git a/tests/_python_orig/test_threading.py b/tests/_python_orig/test_threading.py
new file mode 100644
index 000000000..67980c6a8
--- /dev/null
+++ b/tests/_python_orig/test_threading.py
@@ -0,0 +1,9 @@
+from taichi.lang.misc import get_host_arch_list
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_while():
+ assert ti._lib.core.test_threading()
diff --git a/tests/_python_orig/test_torch_ad.py b/tests/_python_orig/test_torch_ad.py
new file mode 100644
index 000000000..d2422b6ff
--- /dev/null
+++ b/tests/_python_orig/test_torch_ad.py
@@ -0,0 +1,97 @@
+import sys
+
+import numpy as np
+import pytest
+from taichi.lang.util import has_pytorch
+
+import taichi as ti
+from tests import test_utils
+
+if has_pytorch():
+ import torch
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(exclude=ti.opengl)
+def test_torch_ad():
+ n = 32
+
+ x = ti.field(ti.f32, shape=n, needs_grad=True)
+ y = ti.field(ti.f32, shape=n, needs_grad=True)
+
+ @ti.kernel
+ def torch_kernel():
+ for i in range(n):
+ # Do whatever complex operations here
+ y[n - i - 1] = x[i] * x[i]
+
+ class Sqr(torch.autograd.Function):
+ @staticmethod
+ def forward(ctx, inp):
+ x.from_torch(inp)
+ torch_kernel()
+ outp = y.to_torch()
+ return outp
+
+ @staticmethod
+ def backward(ctx, outp_grad):
+ ti.clear_all_gradients()
+ y.grad.from_torch(outp_grad)
+ torch_kernel.grad()
+ inp_grad = x.grad.to_torch()
+ return inp_grad
+
+ sqr = Sqr.apply
+ for i in range(10):
+ X = torch.tensor(2 * np.ones((n, ), dtype=np.float32),
+ requires_grad=True)
+ sqr(X).sum().backward()
+ ret = X.grad.cpu().numpy()
+ for j in range(n):
+ assert ret[j] == 4
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@pytest.mark.skipif(sys.platform == 'win32', reason='not working on Windows.')
+@test_utils.test(exclude=ti.opengl)
+def test_torch_ad_gpu():
+ if not torch.cuda.is_available():
+ return
+
+ device = torch.device('cuda:0')
+ n = 32
+
+ x = ti.field(ti.f32, shape=n, needs_grad=True)
+ y = ti.field(ti.f32, shape=n, needs_grad=True)
+
+ @ti.kernel
+ def torch_kernel():
+ for i in range(n):
+ # Do whatever complex operations here
+ y[n - i - 1] = x[i] * x[i]
+
+ class Sqr(torch.autograd.Function):
+ @staticmethod
+ def forward(ctx, inp):
+ x.from_torch(inp)
+ torch_kernel()
+ outp = y.to_torch(device=device)
+ return outp
+
+ @staticmethod
+ def backward(ctx, outp_grad):
+ ti.clear_all_gradients()
+ y.grad.from_torch(outp_grad)
+ torch_kernel.grad()
+ inp_grad = x.grad.to_torch(device=device)
+ return inp_grad
+
+ sqr = Sqr.apply
+ for i in range(10):
+ X = torch.tensor(2 * np.ones((n, ), dtype=np.float32),
+ requires_grad=True,
+ device=device)
+ sqr(X).sum().backward()
+ ret = X.grad.cpu().numpy()
+ for j in range(n):
+ assert ret[j] == 4
diff --git a/tests/_python_orig/test_torch_io.py b/tests/_python_orig/test_torch_io.py
new file mode 100644
index 000000000..fc6182efb
--- /dev/null
+++ b/tests/_python_orig/test_torch_io.py
@@ -0,0 +1,288 @@
+import numpy as np
+import pytest
+from taichi.lang import impl
+from taichi.lang.util import has_pytorch
+
+import taichi as ti
+from tests import test_utils
+
+if has_pytorch():
+ import torch
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_io_devices():
+ n = 32
+ x = ti.field(dtype=ti.i32, shape=n)
+
+ @ti.kernel
+ def load(y: ti.ext_arr()):
+ for i in x:
+ x[i] = y[i] + 10
+
+ @ti.kernel
+ def inc():
+ for i in x:
+ x[i] += i
+
+ @ti.kernel
+ def store(y: ti.ext_arr()):
+ for i in x:
+ y[i] = x[i] * 2
+
+ devices = ['cpu']
+ if torch.cuda.is_available():
+ devices.append('cuda:0')
+ for device in devices:
+ y = torch.Tensor(np.ones(shape=n, dtype=np.int32)).to(device)
+
+ load(y)
+ inc()
+ store(y)
+
+ y = y.cpu().numpy()
+
+ for i in range(n):
+ assert y[i] == (11 + i) * 2
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_io():
+ n = 32
+
+ @ti.kernel
+ def torch_kernel(t: ti.ext_arr(), o: ti.ext_arr()):
+ for i in range(n):
+ o[i] = t[i] * t[i]
+
+ @ti.kernel
+ def torch_kernel_2(t_grad: ti.ext_arr(), t: ti.ext_arr(),
+ o_grad: ti.ext_arr()):
+ for i in range(n):
+ t_grad[i] = 2 * t[i] * o_grad[i]
+
+ class Sqr(torch.autograd.Function):
+ @staticmethod
+ def forward(ctx, inp):
+ outp = torch.zeros_like(inp)
+ ctx.save_for_backward(inp)
+ torch_kernel(inp, outp)
+ return outp
+
+ @staticmethod
+ def backward(ctx, outp_grad):
+ outp_grad = outp_grad.contiguous()
+ inp_grad = torch.zeros_like(outp_grad)
+ inp, = ctx.saved_tensors
+ torch_kernel_2(inp_grad, inp, outp_grad)
+ return inp_grad
+
+ sqr = Sqr.apply
+ X = torch.tensor(2 * np.ones((n, ), dtype=np.float32), requires_grad=True)
+ sqr(X).sum().backward()
+ ret = X.grad.cpu()
+ for i in range(n):
+ assert ret[i] == 4
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_io_2d():
+ n = 32
+
+ @ti.kernel
+ def torch_kernel(t: ti.ext_arr(), o: ti.ext_arr()):
+ for i in range(n):
+ for j in range(n):
+ o[i, j] = t[i, j] * t[i, j]
+
+ class Sqr(torch.autograd.Function):
+ @staticmethod
+ def forward(ctx, inp):
+ outp = torch.zeros_like(inp)
+ torch_kernel(inp, outp)
+ return outp
+
+ sqr = Sqr.apply
+ X = torch.tensor(2 * np.ones((n, n), dtype=np.float32), requires_grad=True)
+ val = sqr(X).sum()
+ assert val == 2 * 2 * n * n
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_io_3d():
+ n = 16
+
+ @ti.kernel
+ def torch_kernel(t: ti.ext_arr(), o: ti.ext_arr()):
+ for i in range(n):
+ for j in range(n):
+ for k in range(n):
+ o[i, j, k] = t[i, j, k] * t[i, j, k]
+
+ class Sqr(torch.autograd.Function):
+ @staticmethod
+ def forward(ctx, inp):
+ outp = torch.zeros_like(inp)
+ torch_kernel(inp, outp)
+ return outp
+
+ sqr = Sqr.apply
+ X = torch.tensor(2 * np.ones((n, n, n), dtype=np.float32),
+ requires_grad=True)
+ val = sqr(X).sum()
+ assert val == 2 * 2 * n * n * n
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_io_simple():
+ n = 32
+
+ x1 = ti.field(ti.f32, shape=(n, n))
+ t1 = torch.tensor(2 * np.ones((n, n), dtype=np.float32))
+
+ x2 = ti.Matrix.field(2, 3, ti.f32, shape=(n, n))
+ t2 = torch.tensor(2 * np.ones((n, n, 2, 3), dtype=np.float32))
+
+ x1.from_torch(t1)
+ for i in range(n):
+ for j in range(n):
+ assert x1[i, j] == 2
+
+ x2.from_torch(t2)
+ for i in range(n):
+ for j in range(n):
+ for k in range(2):
+ for l in range(3):
+ assert x2[i, j][k, l] == 2
+
+ t3 = x2.to_torch()
+ assert (t2 == t3).all()
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_io_zeros():
+ mat = ti.Matrix.field(2, 6, dtype=ti.f32, shape=(), needs_grad=True)
+ zeros = torch.zeros((2, 6))
+ zeros[1, 2] = 3
+ mat.from_torch(zeros + 1)
+
+ assert mat[None][1, 2] == 4
+
+ zeros = mat.to_torch()
+ assert zeros[1, 2] == 4
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_io_struct():
+ n = 16
+ x1 = ti.Struct.field({"a": ti.i32, "b": ti.f32}, shape=(n, ))
+ t1 = {
+ "a": torch.tensor(2 * np.ones(n, dtype=np.int32)),
+ "b": torch.tensor(3 * np.ones(n, dtype=np.float32))
+ }
+
+ x1.from_torch(t1)
+ for i in range(n):
+ assert x1[i].a == 2
+ assert x1[i].b == 3
+
+ t2 = x1.to_torch()
+ for k in t1:
+ assert (t1[k] == t2[k]).all()
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_fused_kernels():
+ n = 12
+ X = ti.Matrix.field(3, 2, ti.f32, shape=(n, n, n))
+ s = impl.get_runtime().get_num_compiled_functions()
+ t = X.to_torch()
+ assert impl.get_runtime().get_num_compiled_functions() == s + 1
+ X.from_torch(t)
+ assert impl.get_runtime().get_num_compiled_functions() == s + 2
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_device():
+ n = 12
+ X = ti.Matrix.field(3, 2, ti.f32, shape=(n, n, n))
+ assert X.to_torch(device='cpu').device == torch.device('cpu')
+
+ if torch.cuda.is_available():
+ assert X.to_torch(device='cuda:0').device == torch.device('cuda:0')
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_shape_matrix():
+ n = 12
+ x = ti.Matrix.field(3, 2, ti.f32, shape=(n, n))
+ X = x.to_torch()
+ for i in range(n):
+ for j in range(n):
+ for k in range(3):
+ for l in range(2):
+ X[i, j, k, l] = i * 10 + j + k * 100 + l * 1000
+
+ x.from_torch(X)
+ X1 = x.to_torch()
+ x.from_torch(X1)
+ X1 = x.to_torch()
+
+ assert (X == X1).all()
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_shape_vector():
+ n = 12
+ x = ti.Vector.field(3, ti.f32, shape=(n, n))
+ X = x.to_torch()
+ for i in range(n):
+ for j in range(n):
+ for k in range(3):
+ X[i, j, k] = i * 10 + j + k * 100
+
+ x.from_torch(X)
+ X1 = x.to_torch()
+ x.from_torch(X1)
+ X1 = x.to_torch()
+
+ assert (X == X1).all()
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_torch_zero():
+ @ti.kernel
+ def test_torch(arr: ti.ext_arr()):
+ pass
+
+ test_torch(torch.zeros((0), dtype=torch.int32))
+ test_torch(torch.zeros((0, 5), dtype=torch.int32))
+ test_torch(torch.zeros((5, 0, 5), dtype=torch.int32))
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_torch_view():
+ @ti.kernel
+ def copy(x: ti.any_arr(), y: ti.any_arr()):
+ for i, j in x:
+ y[i, j] = x[i, j]
+
+ x = torch.Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).T
+ y = ti.ndarray(int, (3, 3))
+
+ with pytest.raises(ValueError,
+ match=r'Non contiguous tensors are not supported'):
+ copy(x, y)
diff --git a/tests/_python_orig/test_tuple_assign.py b/tests/_python_orig/test_tuple_assign.py
new file mode 100644
index 000000000..03dc05bec
--- /dev/null
+++ b/tests/_python_orig/test_tuple_assign.py
@@ -0,0 +1,229 @@
+import pytest
+from taichi.lang.misc import get_host_arch_list
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_fibonacci():
+ @ti.kernel
+ def ti_fibonacci(n: ti.i32) -> ti.i32:
+ a, b = 0, 1
+ # This is to make the inner for loop serial on purpose...
+ for _ in range(1):
+ for i in range(n):
+ a, b = b, a + b
+ return b
+
+ def py_fibonacci(n):
+ a, b = 0, 1
+ for i in range(n):
+ a, b = b, a + b
+ return b
+
+ for n in range(5):
+ assert ti_fibonacci(n) == py_fibonacci(n)
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_assign2():
+ a = ti.field(ti.f32, ())
+ b = ti.field(ti.f32, ())
+
+ @ti.kernel
+ def func():
+ a[None], b[None] = 2, 3
+
+ func()
+ assert a[None] == 2
+ assert b[None] == 3
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_assign2_mismatch3():
+ a = ti.field(ti.f32, ())
+ b = ti.field(ti.f32, ())
+
+ @ti.kernel
+ def func():
+ a[None], b[None] = 2, 3, 4
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_assign2_mismatch1():
+ a = ti.field(ti.f32, ())
+ b = ti.field(ti.f32, ())
+
+ @ti.kernel
+ def func():
+ a[None], b[None] = 2
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_swap2():
+ a = ti.field(ti.f32, ())
+ b = ti.field(ti.f32, ())
+
+ @ti.kernel
+ def func():
+ a[None], b[None] = b[None], a[None]
+
+ a[None] = 2
+ b[None] = 3
+ func()
+ assert a[None] == 3
+ assert b[None] == 2
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_assign2_static():
+ a = ti.field(ti.f32, ())
+ b = ti.field(ti.f32, ())
+
+ @ti.kernel
+ def func():
+ # XXX: why a, b = ti.static(b, a) doesn't work?
+ c, d = ti.static(b, a)
+ c[None], d[None] = 2, 3
+
+ func()
+ assert a[None] == 3
+ assert b[None] == 2
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_swap3():
+ a = ti.field(ti.f32, ())
+ b = ti.field(ti.f32, ())
+ c = ti.field(ti.f32, ())
+
+ @ti.kernel
+ def func():
+ a[None], b[None], c[None] = b[None], c[None], a[None]
+
+ a[None] = 2
+ b[None] = 3
+ c[None] = 4
+ func()
+ assert a[None] == 3
+ assert b[None] == 4
+ assert c[None] == 2
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_unpack_from_tuple():
+ a = ti.field(ti.f32, ())
+ b = ti.field(ti.f32, ())
+ c = ti.field(ti.f32, ())
+
+ list = [2, 3, 4]
+
+ @ti.kernel
+ def func():
+ a[None], b[None], c[None] = list
+
+ func()
+ assert a[None] == 2
+ assert b[None] == 3
+ assert c[None] == 4
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_unpack_mismatch_tuple():
+ a = ti.field(ti.f32, ())
+ b = ti.field(ti.f32, ())
+
+ list = [2, 3, 4]
+
+ @ti.kernel
+ def func():
+ a[None], b[None] = list
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_unpack_from_vector():
+ a = ti.field(ti.f32, ())
+ b = ti.field(ti.f32, ())
+ c = ti.field(ti.f32, ())
+
+ @ti.kernel
+ def func():
+ vector = ti.Vector([2, 3, 4])
+ a[None], b[None], c[None] = vector
+
+ func()
+ assert a[None] == 2
+ assert b[None] == 3
+ assert c[None] == 4
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_unpack_mismatch_vector():
+ a = ti.field(ti.f32, ())
+ b = ti.field(ti.f32, ())
+
+ @ti.kernel
+ def func():
+ vector = ti.Vector([2, 3, 4])
+ a[None], b[None] = vector
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_unpack_mismatch_type():
+ a = ti.field(ti.f32, ())
+ b = ti.field(ti.f32, ())
+
+ bad = 12
+
+ @ti.kernel
+ def func():
+ a[None], b[None] = bad
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_unpack_mismatch_matrix():
+ a = ti.field(ti.f32, ())
+ b = ti.field(ti.f32, ())
+ c = ti.field(ti.f32, ())
+ d = ti.field(ti.f32, ())
+
+ @ti.kernel
+ def func():
+ bad = ti.Matrix([[2, 3], [4, 5]])
+ a[None], b[None], c[None], d[None] = bad
+
+ with pytest.raises(ti.TaichiCompilationError):
+ func()
+
+
+@test_utils.test(arch=get_host_arch_list())
+def test_unpack_from_shape():
+ a = ti.field(ti.f32, ())
+ b = ti.field(ti.f32, ())
+ c = ti.field(ti.f32, ())
+ d = ti.field(ti.f32, (2, 3, 4))
+
+ @ti.kernel
+ def func():
+ a[None], b[None], c[None] = d.shape
+
+ func()
+ assert a[None] == 2
+ assert b[None] == 3
+ assert c[None] == 4
diff --git a/tests/_python_orig/test_type_check.py b/tests/_python_orig/test_type_check.py
new file mode 100644
index 000000000..0330e7be7
--- /dev/null
+++ b/tests/_python_orig/test_type_check.py
@@ -0,0 +1,82 @@
+import numpy as np
+import pytest
+from taichi.lang.util import has_pytorch
+
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test(arch=ti.cpu)
+def test_unary_op():
+ @ti.kernel
+ def floor():
+ a = 1
+ b = ti.floor(a)
+
+ with pytest.raises(ti.TaichiTypeError,
+ match="'floor' takes real inputs only"):
+ floor()
+
+
+@test_utils.test(arch=ti.cpu)
+def test_binary_op():
+ @ti.kernel
+ def bitwise_float():
+ a = 1
+ b = 3.1
+ c = a & b
+
+ with pytest.raises(ti.TaichiTypeError,
+ match=r"unsupported operand type\(s\) for '&'"):
+ bitwise_float()
+
+
+@test_utils.test(arch=ti.cpu)
+def test_ternary_op():
+ @ti.kernel
+ def select():
+ a = 1.1
+ b = 3
+ c = 3.6
+ d = b if a else c
+
+ with pytest.raises(TypeError,
+ match="`if` conditions must be of type int32"):
+ select()
+
+
+@pytest.mark.skipif(not has_pytorch(), reason='Pytorch not installed.')
+@test_utils.test(arch=[ti.cpu, ti.opengl])
+def test_subscript():
+ a = ti.ndarray(ti.i32, shape=(10, 10))
+
+ @ti.kernel
+ def any_array(x: ti.any_arr()):
+ b = x[3, 1.1]
+
+ with pytest.raises(ti.TaichiTypeError, match="indices must be integers"):
+ any_array(a)
+
+
+@test_utils.test()
+def test_0d_ndarray():
+ @ti.kernel
+ def foo() -> ti.i32:
+ a = np.array(3, dtype=np.int32)
+ return a
+
+ assert foo() == 3
+
+
+@test_utils.test()
+def test_non_0d_ndarray():
+ @ti.kernel
+ def foo():
+ a = np.array([1])
+
+ with pytest.raises(
+ ti.TaichiTypeError,
+ match=
+ "Only 0-dimensional numpy array can be used to initialize a scalar expression"
+ ):
+ foo()
diff --git a/tests/_python_orig/test_types.py b/tests/_python_orig/test_types.py
new file mode 100644
index 000000000..2d1cb7851
--- /dev/null
+++ b/tests/_python_orig/test_types.py
@@ -0,0 +1,155 @@
+import pytest
+from taichi.lang import impl
+
+import taichi as ti
+from tests import test_utils
+
+_TI_TYPES = [ti.i8, ti.i16, ti.i32, ti.u8, ti.u16, ti.u32, ti.f32]
+_TI_64_TYPES = [ti.i64, ti.u64, ti.f64]
+
+
+def _test_type_assign_argument(dt):
+ x = ti.field(dt, shape=())
+
+ @ti.kernel
+ def func(value: dt):
+ x[None] = value
+
+ func(3)
+ assert x[None] == 3
+
+
+@pytest.mark.parametrize('dt', _TI_TYPES)
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_type_assign_argument(dt):
+ _test_type_assign_argument(dt)
+
+
+@pytest.mark.parametrize('dt', _TI_64_TYPES)
+@test_utils.test(exclude=[ti.opengl, ti.vulkan], require=ti.extension.data64)
+def test_type_assign_argument64(dt):
+ _test_type_assign_argument(dt)
+
+
+def _test_type_operator(dt):
+ x = ti.field(dt, shape=())
+ y = ti.field(dt, shape=())
+ add = ti.field(dt, shape=())
+ mul = ti.field(dt, shape=())
+
+ @ti.kernel
+ def func():
+ add[None] = x[None] + y[None]
+ mul[None] = x[None] * y[None]
+
+ for i in range(0, 3):
+ for j in range(0, 3):
+ x[None] = i
+ y[None] = j
+ func()
+ assert add[None] == x[None] + y[None]
+ assert mul[None] == x[None] * y[None]
+
+
+@pytest.mark.parametrize('dt', _TI_TYPES)
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_type_operator(dt):
+ _test_type_operator(dt)
+
+
+@pytest.mark.parametrize('dt', _TI_64_TYPES)
+@test_utils.test(exclude=[ti.opengl, ti.vulkan], require=ti.extension.data64)
+def test_type_operator64(dt):
+ _test_type_operator(dt)
+
+
+def _test_type_field(dt):
+ x = ti.field(dt, shape=(3, 2))
+
+ @ti.kernel
+ def func(i: ti.i32, j: ti.i32):
+ x[i, j] = 3
+
+ for i in range(0, 3):
+ for j in range(0, 2):
+ func(i, j)
+ assert x[i, j] == 3
+
+
+@pytest.mark.parametrize('dt', _TI_TYPES)
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_type_field(dt):
+ _test_type_field(dt)
+
+
+@pytest.mark.parametrize('dt', _TI_64_TYPES)
+@test_utils.test(exclude=[ti.opengl, ti.vulkan], require=ti.extension.data64)
+def test_type_field64(dt):
+ _test_type_field(dt)
+
+
+def _test_overflow(dt, n):
+ a = ti.field(dt, shape=())
+ b = ti.field(dt, shape=())
+ c = ti.field(dt, shape=())
+
+ @ti.kernel
+ def func():
+ c[None] = a[None] + b[None]
+
+ a[None] = 2**n // 3
+ b[None] = 2**n // 3
+
+ func()
+
+ assert a[None] == 2**n // 3
+ assert b[None] == 2**n // 3
+
+ if ti.types.is_signed(dt):
+ assert c[None] == 2**n // 3 * 2 - (2**n) # overflows
+ else:
+ assert c[None] == 2**n // 3 * 2 # does not overflow
+
+
+@pytest.mark.parametrize('dt,n', [
+ (ti.i8, 8),
+ (ti.u8, 8),
+ (ti.i16, 16),
+ (ti.u16, 16),
+ (ti.i32, 32),
+ (ti.u32, 32),
+])
+@test_utils.test(exclude=[ti.opengl, ti.vulkan])
+def test_overflow(dt, n):
+ _test_overflow(dt, n)
+
+
+@pytest.mark.parametrize('dt,n', [
+ (ti.i64, 64),
+ (ti.u64, 64),
+])
+@test_utils.test(exclude=[ti.opengl, ti.vulkan], require=ti.extension.data64)
+def test_overflow64(dt, n):
+ _test_overflow(dt, n)
+
+
+@pytest.mark.parametrize('dt,val', [
+ (ti.u32, 0xffffffff),
+ (ti.u64, 0xffffffffffffffff),
+])
+@test_utils.test(require=ti.extension.data64)
+def test_uint_max(dt, val):
+ # https://github.com/taichi-dev/taichi/issues/2060
+ impl.get_runtime().default_ip = dt
+ N = 16
+ f = ti.field(dt, shape=N)
+
+ @ti.kernel
+ def run():
+ for i in f:
+ f[i] = val
+
+ run()
+ fs = f.to_numpy()
+ for f in fs:
+ assert f == val
diff --git a/tests/_python_orig/test_unary_ops.py b/tests/_python_orig/test_unary_ops.py
new file mode 100644
index 000000000..5aea17db3
--- /dev/null
+++ b/tests/_python_orig/test_unary_ops.py
@@ -0,0 +1,55 @@
+import numpy as np
+
+import taichi as ti
+from tests import test_utils
+
+
+def _test_op(dt, taichi_op, np_op):
+ print('arch={} default_fp={}'.format(
+ ti.lang.impl.current_cfg().arch,
+ ti.lang.impl.current_cfg().default_fp))
+ n = 4
+ val = ti.field(dt, shape=n)
+
+ def f(i):
+ return i * 0.1 + 0.4
+
+ @ti.kernel
+ def fill():
+ for i in range(n):
+ val[i] = taichi_op(f(ti.cast(i, dt)))
+
+ fill()
+
+ # check that it is double precision
+ for i in range(n):
+ if dt == ti.f64:
+ assert abs(np_op(float(f(i))) - val[i]) < 1e-15
+ else:
+ assert abs(np_op(float(f(i))) -
+ val[i]) < 1e-6 if ti.lang.impl.current_cfg(
+ ).arch != ti.opengl and ti.lang.impl.current_cfg(
+ ).arch != ti.vulkan else 1e-5
+
+
+def test_f64_trig():
+ op_pairs = [
+ (ti.sin, np.sin),
+ (ti.cos, np.cos),
+ (ti.asin, np.arcsin),
+ (ti.acos, np.arccos),
+ (ti.tan, np.tan),
+ (ti.tanh, np.tanh),
+ (ti.exp, np.exp),
+ (ti.log, np.log),
+ ]
+ for dt in [ti.f32, ti.f64]:
+ for taichi_op, np_op in op_pairs:
+
+ @test_utils.test(
+ require=ti.extension.data64 if dt == ti.f64 else [],
+ default_fp=dt)
+ def wrapped():
+ _test_op(dt, taichi_op, np_op)
+
+ wrapped()
diff --git a/tests/_python_orig/test_while.py b/tests/_python_orig/test_while.py
new file mode 100644
index 000000000..a3c53e797
--- /dev/null
+++ b/tests/_python_orig/test_while.py
@@ -0,0 +1,42 @@
+import taichi as ti
+from tests import test_utils
+
+
+@test_utils.test()
+def test_while():
+ x = ti.field(ti.f32)
+
+ N = 1
+
+ ti.root.dense(ti.i, N).place(x)
+
+ @ti.kernel
+ def func():
+ i = 0
+ s = 0
+ while i < 10:
+ s += i
+ i += 1
+ x[0] = s
+
+ func()
+ assert x[0] == 45
+
+
+@test_utils.test()
+def test_break():
+ ret = ti.field(ti.i32, shape=())
+
+ @ti.kernel
+ def func():
+ i = 0
+ s = 0
+ while True:
+ s += i
+ i += 1
+ if i > 10:
+ break
+ ret[None] = s
+
+ func()
+ assert ret[None] == 55