Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[test] Flaky test (don't merge) #6622

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
296 changes: 0 additions & 296 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,73 +105,6 @@ jobs:
CR_PAT: ${{ secrets.GITHUB_TOKEN }}
CI_SETUP_CMAKE_ARGS: -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DTI_WITH_OPENGL:BOOL=ON -DTI_WITH_CC:BOOL=ON -DTI_WITH_VULKAN:BOOL=ON -DTI_BUILD_TESTS:BOOL=ON

build_and_test_cpu_mac:
name: Build and Test macos (CPU)
needs: check_files
timeout-minutes: ${{ github.event.schedule != '0 18 * * *' && 120 || 180 }}
strategy:
matrix:
include:
- os: macos-10.15
python: 3.7
with_cc: OFF
with_cpp_tests: ON
wanted_archs: 'cpu,vulkan'
runs-on:
- self-hosted
- ${{ matrix.os }}
env:
PY: ${{ matrix.python }}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: '0'
submodules: 'recursive'

- name: Prepare Environment
run: |
. .github/workflows/scripts/common-utils.sh
prepare-build-cache
env:
CI_PLATFORM: macos

- name: Build & Install
if: needs.check_files.outputs.run_job == 'true'
run: |
# Use the molten-vk v1.1.10 downloaded from taichi assets
brew uninstall molten-vk -f
.github/workflows/scripts/unix-build.sh
env:
CXX: clang++
TAICHI_CMAKE_ARGS: >-
-DTI_WITH_OPENGL:BOOL=OFF
-DTI_WITH_CC:BOOL=${{ matrix.with_cc }}
-DTI_WITH_VULKAN:BOOL=ON
-DTI_WITH_C_API=ON
-DTI_BUILD_TESTS:BOOL=${{ matrix.with_cpp_tests }}

# [DEBUG] Copy this step around to enable debugging inside Github Action instances.
#- name: Setup tmate session
# uses: mxschmitt/action-tmate@v3
# with:
# limit-access-to-actor: true

- name: Test
id: test
if: needs.check_files.outputs.run_job == 'true'
run: .github/workflows/scripts/unix_test.sh
env:
TI_WANTED_ARCHS: ${{ matrix.wanted_archs }}
TI_SKIP_CPP_TESTS: Disabled because Vulkan is supported but not working on buildbot4

- name: Save wheel if test failed
if: failure() && steps.test.conclusion == 'failure'
uses: actions/upload-artifact@v3
with:
name: broken-wheel
path: dist/*
retention-days: 7

build_and_test_gpu_linux:
name: Build and Test (GPU)
needs: check_files
Expand Down Expand Up @@ -256,232 +189,3 @@ jobs:
name: bad-captures
path: taichi-release-tests/bad-compare/*
retention-days: 7


build_and_test_windows:
name: Build and Test Windows
needs: check_files
strategy:
matrix:
include:
- os: windows-2019
llvmVer : '15'
archs: "cpu,cuda,opengl"
runsOn: [self-hosted, windows, cuda, OpenGL]
runs-on: ${{ matrix.runsOn }}
timeout-minutes: ${{ github.event.schedule != '0 18 * * *' && 90 || 180 }}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: '0'
submodules: 'recursive'

- uses: actions/setup-python@v4
with:
python-version: 3.7

- name: Build
shell: pwsh
if: ${{ needs.check_files.outputs.run_job != 'false' }}
run: |
.\.github\workflows\scripts\win_build.ps1 -llvmVer ${{ matrix.llvmVer }} -installVulkan -libsDir "$env:LocalAppData/buildbot"
env:
PY: "3.7"
TAICHI_CMAKE_ARGS: >-
-DTI_WITH_OPENGL:BOOL=ON
-DTI_WITH_DX11:BOOL=ON
-DTI_WITH_DX12:BOOL=ON
-DTI_WITH_CC:BOOL=OFF
-DTI_BUILD_TESTS:BOOL=ON
-DTI_WITH_C_API=ON

- name: Test
id: test
shell: pwsh
if: ${{ needs.check_files.outputs.run_job != 'false' }}
run: |
.\.github\workflows\scripts\win_test.ps1 -libsDir "$env:LocalAppData/buildbot"
env:
PY: "3.7"
TI_WANTED_ARCHS: ${{ matrix.archs }}
TI_SKIP_VERSION_CHECK: ON
TI_DEVICE_MEMORY_GB: '1'
TI_RUN_RELEASE_TESTS: '1'

- name: Save wheel if test failed
if: failure() && steps.test.conclusion == 'failure'
uses: actions/upload-artifact@v3
with:
name: broken-wheel
path: dist/*
retention-days: 7

build_and_test_m1:
name: Build and Test (Apple M1)
needs: check_files
timeout-minutes: ${{ github.event.schedule != '0 18 * * *' && 60 || 120 }}
strategy:
matrix:
include:
- os: macos-latest
python: 3.8
defaults:
run:
# https://github.com/actions/runner/issues/805#issuecomment-844426478
shell: '/usr/bin/arch -arch arm64e /bin/bash --noprofile --norc -eo pipefail {0}'
runs-on: [self-hosted, m1]
steps:
- uses: actions/checkout@v3
with:
fetch-depth: '0'
submodules: 'recursive'

- name: Build
run: |
[[ ${{needs.check_files.outputs.run_job}} == false ]] && exit 0
. .github/workflows/scripts/common-utils.sh
prepare-build-cache
brew install molten-vk
brew install llvm@15
.github/workflows/scripts/unix-build.sh
env:
PY: ${{ matrix.python }}
CXX: clang++
TAICHI_CMAKE_ARGS: >-
-DTI_WITH_OPENGL:BOOL=OFF
-DTI_WITH_CUDA:BOOL=OFF
-DTI_WITH_CC:BOOL=OFF
-DTI_WITH_VULKAN:BOOL=ON
-DTI_BUILD_TESTS:BOOL=ON
-DTI_WITH_C_API=ON

- name: Test
id: test
run: |
[[ ${{needs.check_files.outputs.run_job}} == false ]] && exit 0
.github/workflows/scripts/unix_test.sh
env:
TI_WANTED_ARCHS: 'cpu,metal,vulkan'
PY: ${{ matrix.python }}
PLATFORM: 'm1'
TI_RUN_RELEASE_TESTS: '1'

- name: Save wheel if test failed
if: failure() && steps.test.conclusion == 'failure'
uses: actions/upload-artifact@v3
with:
name: broken-wheel
path: dist/*
retention-days: 7

- name: Save Bad Captures
if: failure() && steps.test.conclusion == 'failure'
uses: actions/upload-artifact@v3
with:
name: bad-captures
path: taichi-release-tests/bad-compare/*
retention-days: 7

build_android_demos:
name: Build Android Demos
# Skip this job when testing the offline cache
if: ${{ github.event.schedule != '0 18 * * *' }}
needs: check_files
runs-on: [self-hosted, Linux, cn]
timeout-minutes: 60
permissions:
packages: read
contents: read
env:
REDIS_HOST: 172.16.5.8
PY: py39
steps:
- uses: actions/checkout@v3
name: Checkout taichi
with:
fetch-depth: '0'
submodules: "recursive"

- name: Prepare Environment
run: >-
. .github/workflows/scripts/common-utils.sh;
prepare-build-cache;

TAICHI_WHEEL_DIR=$(mktemp -d);
echo TAICHI_WHEEL_DIR=$TAICHI_WHEEL_DIR >> $GITHUB_ENV;
chmod 0777 $TAICHI_WHEEL_DIR;

echo CI_DOCKER_RUN_EXTRA_ARGS="
-e REDIS_HOST
-v $(pwd):/home/dev/taichi
-v $TAICHI_WHEEL_DIR:/taichi-wheel
" >> $GITHUB_ENV;

- name: Build Host Taichi
run: |
. .github/workflows/scripts/common-utils.sh
ci-docker-run --name taichi-build-host \
-v $TAICHI_WHEEL_DIR:/home/dev/taichi/dist \
registry.taichigraphics.com/taichidev-ubuntu18.04:v0.3.4 \
/home/dev/taichi/.github/workflows/scripts/unix-build.sh
env:
TAICHI_CMAKE_ARGS: >-
-DTI_WITH_OPENGL:BOOL=ON
-DTI_WITH_CC:BOOL=OFF
-DTI_WITH_VULKAN:BOOL=ON
-DTI_WITH_C_API=OFF

- name: Build For Android
run: |
. .github/workflows/scripts/common-utils.sh
git clean -fxd
chown -R 1000:1000 .
ci-docker-run --name taichi-build-android \
registry.taichigraphics.com/taichidev-androidsdk18.04:v0.0.6 \
/home/dev/taichi/.github/workflows/scripts/android-build.sh
env:
TAICHI_CMAKE_ARGS: >-
-DTI_WITH_OPENGL:BOOL=ON
-DTI_WITH_CC:BOOL=OFF
-DTI_WITH_VULKAN:BOOL=ON
-DTI_WITH_LLVM:BOOL=OFF
-DTI_WITH_C_API=ON

- name: Test For Android AOT (export core)
run: |
. .github/workflows/scripts/common-utils.sh
chown -R 1000:1000 .
ci-docker-run-gpu --name taichi-test-android \
registry.taichigraphics.com/taichidev-androidsdk18.04:v0.0.6 \
/home/dev/taichi/.github/workflows/scripts/aot-demo.sh build-and-smoke-test-android-aot-demo

- name: Prepare Unity Build Environment
run: |
. .github/workflows/scripts/common-utils.sh
chown -R 1000:1000 .
ci-docker-run --name taichi-prepare-unity-build-env \
registry.taichigraphics.com/taichidev-androidsdk18.04:v0.0.6 \
/home/dev/taichi/.github/workflows/scripts/aot-demo.sh prepare-unity-build-env

- name: Build Taichi-UnityExample
run: |
. .github/workflows/scripts/common-utils.sh
ci-docker-run --name taichi-build-unity-demo \
registry.taichigraphics.com/unityci-editor:ubuntu-2020.3.14f1-android-1-with-secret-sauce \
/home/dev/taichi/.github/workflows/scripts/aot-demo.sh build-unity-demo

- name: Run Taichi-UnityExample (C-API)
run: |
. .github/workflows/scripts/common-utils.sh
chown -R 1000:1000 .
ci-docker-run --name taichi-run-unity-demo \
registry.taichigraphics.com/taichidev-androidsdk18.04:v0.0.6 \
/home/dev/taichi/.github/workflows/scripts/aot-demo.sh smoke-test-unity-demo

- name: Build & Run C-API Headless Demos (Android)
run: |
. .github/workflows/scripts/common-utils.sh
chown -R 1000:1000 .
ci-docker-run-gpu --name taichi-test-capi-headless-demo \
registry.taichigraphics.com/taichidev-androidsdk18.04:v0.0.6 \
/home/dev/taichi/.github/workflows/scripts/aot-demo.sh build-and-test-headless-demo
6 changes: 6 additions & 0 deletions taichi/codegen/cuda/codegen_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,12 @@ FunctionType CUDAModuleToFunctionConverter::convert(
auto &mod = data.module;
auto &tasks = data.tasks;
#ifdef TI_WITH_CUDA
for (const auto &task : tasks) {
llvm::Function *func = mod->getFunction(task.name);
TI_ASSERT(func);
tlctx_->mark_function_as_cuda_kernel(func, task.block_dim);
}

auto jit = tlctx_->jit.get();
auto cuda_module =
jit->add_module(std::move(mod), executor_->get_config()->gpu_max_reg);
Expand Down
16 changes: 8 additions & 8 deletions taichi/codegen/llvm/codegen_llvm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2638,14 +2638,14 @@ LLVMCompiledTask TaskCodeGenLLVM::run_compilation() {
emit_to_module();
eliminate_unused_functions();

if (config.arch == Arch::cuda) {
// CUDA specific metadata
for (const auto &task : offloaded_tasks) {
llvm::Function *func = module->getFunction(task.name);
TI_ASSERT(func);
tlctx->mark_function_as_cuda_kernel(func, task.block_dim);
}
}
// if (config.arch == Arch::cuda) {
// // CUDA specific metadata
// for (const auto &task : offloaded_tasks) {
// llvm::Function *func = module->getFunction(task.name);
// TI_ASSERT(func);
// tlctx->mark_function_as_cuda_kernel(func, task.block_dim);
// }
// }

return {std::move(offloaded_tasks), std::move(module),
std::move(used_tree_ids), std::move(struct_for_tls_sizes)};
Expand Down
10 changes: 6 additions & 4 deletions taichi/runtime/llvm/llvm_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -663,10 +663,12 @@ void TaichiLLVMContext::insert_nvvm_annotation(llvm::Function *func,
float addrspace(1)*,
float addrspace(1)*)* @kernel, !"kernel", i32 1}
*******************************************************************/
auto ctx = get_this_thread_context();
llvm::Metadata *md_args[] = {llvm::ValueAsMetadata::get(func),
MDString::get(*ctx, key),
llvm::ValueAsMetadata::get(get_constant(val))};
auto ctx = &func->getParent()->getContext();
llvm::Metadata *md_args[] = {
llvm::ValueAsMetadata::get(func), MDString::get(*ctx, key),
llvm::ValueAsMetadata::get(llvm::ConstantInt::get(
*ctx, llvm::APInt(sizeof(val) * 8, (uint64)val,
std::is_signed_v<decltype(val)>)))};

MDNode *md_node = MDNode::get(*ctx, md_args);

Expand Down
7 changes: 5 additions & 2 deletions taichi/runtime/llvm/llvm_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ class TaichiLLVMContext {
llvm::Module *module,
std::function<bool(const std::string &)> export_indicator);

void mark_function_as_cuda_kernel(llvm::Function *func, int block_dim = 0);
static void mark_function_as_cuda_kernel(llvm::Function *func,
int block_dim = 0);

void fetch_this_thread_struct_module();
llvm::Module *get_this_thread_runtime_module();
Expand Down Expand Up @@ -153,7 +154,9 @@ class TaichiLLVMContext {

static int num_instructions(llvm::Function *func);

void insert_nvvm_annotation(llvm::Function *func, std::string key, int val);
static void insert_nvvm_annotation(llvm::Function *func,
std::string key,
int val);

std::unique_ptr<llvm::Module> clone_module_to_this_thread_context(
llvm::Module *module);
Expand Down
2 changes: 2 additions & 0 deletions tests/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,8 @@ def test():
print(f'Running on Arch={arch}')
os.environ['TI_WANTED_ARCHS'] = arch

args.with_offline_cache = True
args.rerun_with_offline_cache = 1
if args.with_offline_cache:
run_count += args.rerun_with_offline_cache
args.timeout *= run_count
Expand Down