From 470aa8678f32f6a037a4e2b15a17746eda7d31d5 Mon Sep 17 00:00:00 2001 From: Xuanda Yang Date: Tue, 29 Dec 2020 06:03:38 +0800 Subject: [PATCH 01/16] add test --- tests/python/test_bit_array_vectorization.py | 42 ++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/python/test_bit_array_vectorization.py b/tests/python/test_bit_array_vectorization.py index 9995575136457..0ac81c98d7e88 100644 --- a/tests/python/test_bit_array_vectorization.py +++ b/tests/python/test_bit_array_vectorization.py @@ -41,3 +41,45 @@ def verify(): init() assign_vectorized() verify() + + +@ti.test(require=ti.extension.quant, debug=True, cfg_optimization=False) +def test_offset_load(): + ci1 = ti.type_factory_.get_custom_int_type(1, False) + + x = ti.field(dtype=ci1) + y = ti.field(dtype=ci1) + z = ti.field(dtype=ci1) + + N = 4096 + n_blocks = 4 + bits = 32 + boundary_offset = 1024 + + block = ti.root.pointer(ti.ij, (n_blocks, n_blocks)) + block.dense(ti.ij, (N // n_blocks, N // (bits * n_blocks)))._bit_array( + ti.j, bits, num_bits=bits).place(x) + block.dense(ti.ij, (N // n_blocks, N // (bits * n_blocks)))._bit_array( + ti.j, bits, num_bits=bits).place(y) + + @ti.kernel + def init(): + for i, j in ti.ndrange((boundary_offset, N - boundary_offset), + (boundary_offset, N - boundary_offset)): + x[i, j] = ti.random(dtype=ti.i32) % 2 + + @ti.kernel + def assign_vectorized(): + ti.bit_vectorize(32) + for i, j in x: + y[i, j] = x[i - 1, j] + + @ti.kernel + def verify(): + for i, j in ti.ndrange((boundary_offset, N - boundary_offset), + (boundary_offset, N - boundary_offset)): + assert y[i, j] == x[i - 1, j] + + init() + assign_vectorized() + verify() From d1b229d91fd0c88a55cbdffea6a10814027d1972 Mon Sep 17 00:00:00 2001 From: Xuanda Yang Date: Tue, 29 Dec 2020 06:18:32 +0800 Subject: [PATCH 02/16] update test --- tests/python/test_bit_array_vectorization.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/python/test_bit_array_vectorization.py b/tests/python/test_bit_array_vectorization.py index 0ac81c98d7e88..174b93500a298 100644 --- a/tests/python/test_bit_array_vectorization.py +++ b/tests/python/test_bit_array_vectorization.py @@ -73,12 +73,16 @@ def assign_vectorized(): ti.bit_vectorize(32) for i, j in x: y[i, j] = x[i - 1, j] + z[i, j] = x[i, j - 1] + @ti.kernel def verify(): for i, j in ti.ndrange((boundary_offset, N - boundary_offset), (boundary_offset, N - boundary_offset)): assert y[i, j] == x[i - 1, j] + assert z[i, j] == x[i, j - 1] + init() assign_vectorized() From 7d0cf37425d44fb4fd2a47aae4fc182f141ff721 Mon Sep 17 00:00:00 2001 From: Taichi Gardener Date: Mon, 28 Dec 2020 17:22:33 -0500 Subject: [PATCH 03/16] [skip ci] enforce code format --- tests/python/test_bit_array_vectorization.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/python/test_bit_array_vectorization.py b/tests/python/test_bit_array_vectorization.py index 174b93500a298..a7d04f5ba5818 100644 --- a/tests/python/test_bit_array_vectorization.py +++ b/tests/python/test_bit_array_vectorization.py @@ -75,7 +75,6 @@ def assign_vectorized(): y[i, j] = x[i - 1, j] z[i, j] = x[i, j - 1] - @ti.kernel def verify(): for i, j in ti.ndrange((boundary_offset, N - boundary_offset), @@ -83,7 +82,6 @@ def verify(): assert y[i, j] == x[i - 1, j] assert z[i, j] == x[i, j - 1] - init() assign_vectorized() verify() From 33ea98de3f28a42040b589addb0207ed81e6a84b Mon Sep 17 00:00:00 2001 From: Xuanda Yang Date: Thu, 31 Dec 2020 03:39:19 +0800 Subject: [PATCH 04/16] pass -1 test --- taichi/transforms/bit_loop_vectorize.cpp | 46 ++++++++++++++++++++ tests/python/test_bit_array_vectorization.py | 10 +++-- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/taichi/transforms/bit_loop_vectorize.cpp b/taichi/transforms/bit_loop_vectorize.cpp index 20bd9879a209c..86ee35af56f65 100644 --- a/taichi/transforms/bit_loop_vectorize.cpp +++ b/taichi/transforms/bit_loop_vectorize.cpp @@ -6,6 +6,7 @@ #include "taichi/ir/statements.h" #include "taichi/ir/transforms.h" #include "taichi/ir/visitors.h" +#include "taichi/ir/analysis.h" TLANG_NAMESPACE_BEGIN @@ -13,6 +14,7 @@ class BitLoopVectorize : public IRVisitor { public: int bit_vectorize; bool in_struct_for_loop; + StructForStmt* loop_stmt; PrimitiveType *bit_array_physical_type; BitLoopVectorize() { @@ -20,6 +22,7 @@ class BitLoopVectorize : public IRVisitor { invoke_default_visitor = true; bit_vectorize = 1; in_struct_for_loop = false; + loop_stmt = nullptr; bit_array_physical_type = nullptr; } @@ -45,6 +48,47 @@ class BitLoopVectorize : public IRVisitor { DataType new_ret_type(ptr_physical_type); ptr->ret_type = new_ret_type; ptr->is_bit_vectorized = true; + // check if j has offset + if (ptr->indices.size() == 2) { + auto diff = irpass::analysis::value_diff_loop_index(ptr->indices[1], loop_stmt, 1); + // TODO: temporarily we only support [j - 1] and [j + 1] + // the general case should be easy to implement + if (diff.linear_related() && diff.high - diff.low == 1 && (diff.low == 1 || diff.low == -1)) { + // construct ptr to x[i, j] + auto indices = ptr->indices; + indices[1] = loop_stmt->body->statements[1].get(); + auto base_ptr = std::make_unique(ptr->snodes, indices); + base_ptr->ret_type = new_ret_type; + base_ptr->is_bit_vectorized = true; + // load x[i, j](base) and x[i, j + 1](offsetted) + DataType load_data_type(bit_array_physical_type); + auto load_base = std::make_unique(base_ptr.get()); + load_base->ret_type = load_data_type; + auto load_offsetted = std::make_unique(ptr); + load_offsetted->ret_type = load_data_type; + // create bit shift and bit and operations + auto base_shift_offset = std::make_unique(TypedConstant(1)); + auto base_shift_opcode = diff.low == -1 ? BinaryOpType::bit_shl : BinaryOpType::bit_sar; + auto base_shift_op = std::make_unique(base_shift_opcode, load_base.get(), base_shift_offset.get()); + + auto offsetted_shift_offset = std::make_unique(TypedConstant(bit_vectorize - 1)); + auto offsetted_shift_opcode = diff.low == -1 ? BinaryOpType::bit_sar : BinaryOpType::bit_shl; + auto offsetted_shift_op = std::make_unique(offsetted_shift_opcode, load_offsetted.get(), offsetted_shift_offset.get()); + + auto or_op = std::make_unique(BinaryOpType::bit_or, base_shift_op.get(), offsetted_shift_op.get()); + // modify IR + auto offsetted_shift_op_p = offsetted_shift_op.get(); + stmt->insert_before_me(std::move(base_ptr)); + stmt->insert_before_me(std::move(load_base)); + stmt->insert_before_me(std::move(load_offsetted)); + stmt->insert_before_me(std::move(base_shift_offset)); + stmt->insert_before_me(std::move(std::move(base_shift_op))); + stmt->insert_before_me(std::move(offsetted_shift_offset)); + stmt->insert_before_me(std::move(std::move(offsetted_shift_op))); + stmt->replace_with(or_op.get()); + offsetted_shift_op_p->insert_after_me(std::move(or_op)); + } + } } } } @@ -72,10 +116,12 @@ class BitLoopVectorize : public IRVisitor { int old_bit_vectorize = bit_vectorize; bit_vectorize = stmt->bit_vectorize; in_struct_for_loop = true; + loop_stmt = stmt; bit_array_physical_type = stmt->snode->physical_type; stmt->body->accept(this); bit_vectorize = old_bit_vectorize; in_struct_for_loop = false; + loop_stmt = nullptr; bit_array_physical_type = nullptr; } diff --git a/tests/python/test_bit_array_vectorization.py b/tests/python/test_bit_array_vectorization.py index a7cfd50589f63..d8af8817cb895 100644 --- a/tests/python/test_bit_array_vectorization.py +++ b/tests/python/test_bit_array_vectorization.py @@ -61,6 +61,8 @@ def test_offset_load(): ti.j, bits, num_bits=bits).place(x) block.dense(ti.ij, (N // n_blocks, N // (bits * n_blocks)))._bit_array( ti.j, bits, num_bits=bits).place(y) + block.dense(ti.ij, (N // n_blocks, N // (bits * n_blocks)))._bit_array( + ti.j, bits, num_bits=bits).place(z) @ti.kernel def init(): @@ -72,15 +74,15 @@ def init(): def assign_vectorized(): ti.bit_vectorize(32) for i, j in x: - y[i, j] = x[i - 1, j] - z[i, j] = x[i, j - 1] + y[i, j] = x[i, j + 1] + # z[i, j] = x[i, j - 1] @ti.kernel def verify(): for i, j in ti.ndrange((boundary_offset, N - boundary_offset), (boundary_offset, N - boundary_offset)): - assert y[i, j] == x[i - 1, j] - assert z[i, j] == x[i, j - 1] + assert y[i, j] == x[i, j + 1] + # assert z[i, j] == x[i, j - 1] init() assign_vectorized() From 8e5aff1019bfcefeacf1fc17f96ae7ab3beb773b Mon Sep 17 00:00:00 2001 From: Taichi Gardener Date: Wed, 30 Dec 2020 14:39:49 -0500 Subject: [PATCH 05/16] [skip ci] enforce code format --- taichi/transforms/bit_loop_vectorize.cpp | 36 ++++++++++++++++-------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/taichi/transforms/bit_loop_vectorize.cpp b/taichi/transforms/bit_loop_vectorize.cpp index 86ee35af56f65..4b7dd04d5b29c 100644 --- a/taichi/transforms/bit_loop_vectorize.cpp +++ b/taichi/transforms/bit_loop_vectorize.cpp @@ -14,7 +14,7 @@ class BitLoopVectorize : public IRVisitor { public: int bit_vectorize; bool in_struct_for_loop; - StructForStmt* loop_stmt; + StructForStmt *loop_stmt; PrimitiveType *bit_array_physical_type; BitLoopVectorize() { @@ -50,14 +50,17 @@ class BitLoopVectorize : public IRVisitor { ptr->is_bit_vectorized = true; // check if j has offset if (ptr->indices.size() == 2) { - auto diff = irpass::analysis::value_diff_loop_index(ptr->indices[1], loop_stmt, 1); + auto diff = irpass::analysis::value_diff_loop_index(ptr->indices[1], + loop_stmt, 1); // TODO: temporarily we only support [j - 1] and [j + 1] - // the general case should be easy to implement - if (diff.linear_related() && diff.high - diff.low == 1 && (diff.low == 1 || diff.low == -1)) { + // the general case should be easy to implement + if (diff.linear_related() && diff.high - diff.low == 1 && + (diff.low == 1 || diff.low == -1)) { // construct ptr to x[i, j] auto indices = ptr->indices; indices[1] = loop_stmt->body->statements[1].get(); - auto base_ptr = std::make_unique(ptr->snodes, indices); + auto base_ptr = + std::make_unique(ptr->snodes, indices); base_ptr->ret_type = new_ret_type; base_ptr->is_bit_vectorized = true; // load x[i, j](base) and x[i, j + 1](offsetted) @@ -67,15 +70,24 @@ class BitLoopVectorize : public IRVisitor { auto load_offsetted = std::make_unique(ptr); load_offsetted->ret_type = load_data_type; // create bit shift and bit and operations - auto base_shift_offset = std::make_unique(TypedConstant(1)); - auto base_shift_opcode = diff.low == -1 ? BinaryOpType::bit_shl : BinaryOpType::bit_sar; - auto base_shift_op = std::make_unique(base_shift_opcode, load_base.get(), base_shift_offset.get()); + auto base_shift_offset = + std::make_unique(TypedConstant(1)); + auto base_shift_opcode = + diff.low == -1 ? BinaryOpType::bit_shl : BinaryOpType::bit_sar; + auto base_shift_op = std::make_unique( + base_shift_opcode, load_base.get(), base_shift_offset.get()); - auto offsetted_shift_offset = std::make_unique(TypedConstant(bit_vectorize - 1)); - auto offsetted_shift_opcode = diff.low == -1 ? BinaryOpType::bit_sar : BinaryOpType::bit_shl; - auto offsetted_shift_op = std::make_unique(offsetted_shift_opcode, load_offsetted.get(), offsetted_shift_offset.get()); + auto offsetted_shift_offset = + std::make_unique(TypedConstant(bit_vectorize - 1)); + auto offsetted_shift_opcode = + diff.low == -1 ? BinaryOpType::bit_sar : BinaryOpType::bit_shl; + auto offsetted_shift_op = std::make_unique( + offsetted_shift_opcode, load_offsetted.get(), + offsetted_shift_offset.get()); - auto or_op = std::make_unique(BinaryOpType::bit_or, base_shift_op.get(), offsetted_shift_op.get()); + auto or_op = std::make_unique( + BinaryOpType::bit_or, base_shift_op.get(), + offsetted_shift_op.get()); // modify IR auto offsetted_shift_op_p = offsetted_shift_op.get(); stmt->insert_before_me(std::move(base_ptr)); From d05d1a46df0065b24c7801acbcc0cb343b7bdc1c Mon Sep 17 00:00:00 2001 From: Xuanda Yang Date: Thu, 31 Dec 2020 03:56:44 +0800 Subject: [PATCH 06/16] test --- tests/python/test_bit_array_vectorization.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/python/test_bit_array_vectorization.py b/tests/python/test_bit_array_vectorization.py index d8af8817cb895..d07b55dc2cb6b 100644 --- a/tests/python/test_bit_array_vectorization.py +++ b/tests/python/test_bit_array_vectorization.py @@ -74,15 +74,15 @@ def init(): def assign_vectorized(): ti.bit_vectorize(32) for i, j in x: - y[i, j] = x[i, j + 1] - # z[i, j] = x[i, j - 1] + # y[i, j] = x[i, j + 1] + z[i, j] = x[i, j - 1] @ti.kernel def verify(): for i, j in ti.ndrange((boundary_offset, N - boundary_offset), (boundary_offset, N - boundary_offset)): - assert y[i, j] == x[i, j + 1] - # assert z[i, j] == x[i, j - 1] + # assert y[i, j] == x[i, j + 1] + assert z[i, j] == x[i, j - 1] init() assign_vectorized() From 96ad330dff4bf08081e745e282f47e82ab080d3c Mon Sep 17 00:00:00 2001 From: Xuanda Yang Date: Thu, 31 Dec 2020 03:59:48 +0800 Subject: [PATCH 07/16] remove redundant --- taichi/transforms/bit_loop_vectorize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/taichi/transforms/bit_loop_vectorize.cpp b/taichi/transforms/bit_loop_vectorize.cpp index 4b7dd04d5b29c..5f2b1eb2241d4 100644 --- a/taichi/transforms/bit_loop_vectorize.cpp +++ b/taichi/transforms/bit_loop_vectorize.cpp @@ -94,9 +94,9 @@ class BitLoopVectorize : public IRVisitor { stmt->insert_before_me(std::move(load_base)); stmt->insert_before_me(std::move(load_offsetted)); stmt->insert_before_me(std::move(base_shift_offset)); - stmt->insert_before_me(std::move(std::move(base_shift_op))); + stmt->insert_before_me(std::move(base_shift_op)); stmt->insert_before_me(std::move(offsetted_shift_offset)); - stmt->insert_before_me(std::move(std::move(offsetted_shift_op))); + stmt->insert_before_me(std::move(offsetted_shift_op)); stmt->replace_with(or_op.get()); offsetted_shift_op_p->insert_after_me(std::move(or_op)); } From bd090b1ed5fdfadb451edf8cd858f523dd6a110f Mon Sep 17 00:00:00 2001 From: Xuanda Yang Date: Thu, 31 Dec 2020 15:13:09 +0800 Subject: [PATCH 08/16] fix offset index --- taichi/transforms/bit_loop_vectorize.cpp | 22 +++++++++++++++----- tests/python/test_bit_array_vectorization.py | 4 ++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/taichi/transforms/bit_loop_vectorize.cpp b/taichi/transforms/bit_loop_vectorize.cpp index 5f2b1eb2241d4..0e67980ae1d0f 100644 --- a/taichi/transforms/bit_loop_vectorize.cpp +++ b/taichi/transforms/bit_loop_vectorize.cpp @@ -54,7 +54,7 @@ class BitLoopVectorize : public IRVisitor { loop_stmt, 1); // TODO: temporarily we only support [j - 1] and [j + 1] // the general case should be easy to implement - if (diff.linear_related() && diff.high - diff.low == 1 && + if (diff.linear_related() && diff.certain() && (diff.low == 1 || diff.low == -1)) { // construct ptr to x[i, j] auto indices = ptr->indices; @@ -63,22 +63,31 @@ class BitLoopVectorize : public IRVisitor { std::make_unique(ptr->snodes, indices); base_ptr->ret_type = new_ret_type; base_ptr->is_bit_vectorized = true; - // load x[i, j](base) and x[i, j + 1](offsetted) + // load x[i, j](base) DataType load_data_type(bit_array_physical_type); auto load_base = std::make_unique(base_ptr.get()); load_base->ret_type = load_data_type; - auto load_offsetted = std::make_unique(ptr); + // load x[i, j + 1](offsetted) + // since we are doing vectorization, the actual data should be x[i, j + 32] + auto offset_constant = std::make_unique(TypedConstant(bit_vectorize)); + auto offset_index_opcode = diff.low == -1 ? BinaryOpType::sub : BinaryOpType::add; + auto offset_index = std::make_unique(offset_index_opcode, indices[1], offset_constant.get()); + indices[1] = offset_index.get(); + auto offset_ptr = std::make_unique(ptr->snodes, indices); + offset_ptr->ret_type = new_ret_type; + offset_ptr->is_bit_vectorized = true; + auto load_offsetted = std::make_unique(offset_ptr.get()); load_offsetted->ret_type = load_data_type; // create bit shift and bit and operations auto base_shift_offset = - std::make_unique(TypedConstant(1)); + std::make_unique(TypedConstant(load_data_type, 1)); auto base_shift_opcode = diff.low == -1 ? BinaryOpType::bit_shl : BinaryOpType::bit_sar; auto base_shift_op = std::make_unique( base_shift_opcode, load_base.get(), base_shift_offset.get()); auto offsetted_shift_offset = - std::make_unique(TypedConstant(bit_vectorize - 1)); + std::make_unique(TypedConstant(load_data_type, bit_vectorize - 1)); auto offsetted_shift_opcode = diff.low == -1 ? BinaryOpType::bit_sar : BinaryOpType::bit_shl; auto offsetted_shift_op = std::make_unique( @@ -92,6 +101,9 @@ class BitLoopVectorize : public IRVisitor { auto offsetted_shift_op_p = offsetted_shift_op.get(); stmt->insert_before_me(std::move(base_ptr)); stmt->insert_before_me(std::move(load_base)); + stmt->insert_before_me(std::move(offset_constant)); + stmt->insert_before_me(std::move(offset_index)); + stmt->insert_before_me(std::move(offset_ptr)); stmt->insert_before_me(std::move(load_offsetted)); stmt->insert_before_me(std::move(base_shift_offset)); stmt->insert_before_me(std::move(base_shift_op)); diff --git a/tests/python/test_bit_array_vectorization.py b/tests/python/test_bit_array_vectorization.py index d07b55dc2cb6b..b6574ab8c7cde 100644 --- a/tests/python/test_bit_array_vectorization.py +++ b/tests/python/test_bit_array_vectorization.py @@ -74,14 +74,14 @@ def init(): def assign_vectorized(): ti.bit_vectorize(32) for i, j in x: - # y[i, j] = x[i, j + 1] + y[i, j] = x[i, j + 1] z[i, j] = x[i, j - 1] @ti.kernel def verify(): for i, j in ti.ndrange((boundary_offset, N - boundary_offset), (boundary_offset, N - boundary_offset)): - # assert y[i, j] == x[i, j + 1] + assert y[i, j] == x[i, j + 1] assert z[i, j] == x[i, j - 1] init() From 96229dd5596472b112f67335b4d4983b719ef41f Mon Sep 17 00:00:00 2001 From: Taichi Gardener Date: Thu, 31 Dec 2020 02:13:45 -0500 Subject: [PATCH 09/16] [skip ci] enforce code format --- taichi/transforms/bit_loop_vectorize.cpp | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/taichi/transforms/bit_loop_vectorize.cpp b/taichi/transforms/bit_loop_vectorize.cpp index 0e67980ae1d0f..84b284ad9986c 100644 --- a/taichi/transforms/bit_loop_vectorize.cpp +++ b/taichi/transforms/bit_loop_vectorize.cpp @@ -68,15 +68,21 @@ class BitLoopVectorize : public IRVisitor { auto load_base = std::make_unique(base_ptr.get()); load_base->ret_type = load_data_type; // load x[i, j + 1](offsetted) - // since we are doing vectorization, the actual data should be x[i, j + 32] - auto offset_constant = std::make_unique(TypedConstant(bit_vectorize)); - auto offset_index_opcode = diff.low == -1 ? BinaryOpType::sub : BinaryOpType::add; - auto offset_index = std::make_unique(offset_index_opcode, indices[1], offset_constant.get()); + // since we are doing vectorization, the actual data should be x[i, + // j + 32] + auto offset_constant = + std::make_unique(TypedConstant(bit_vectorize)); + auto offset_index_opcode = + diff.low == -1 ? BinaryOpType::sub : BinaryOpType::add; + auto offset_index = std::make_unique( + offset_index_opcode, indices[1], offset_constant.get()); indices[1] = offset_index.get(); - auto offset_ptr = std::make_unique(ptr->snodes, indices); + auto offset_ptr = + std::make_unique(ptr->snodes, indices); offset_ptr->ret_type = new_ret_type; offset_ptr->is_bit_vectorized = true; - auto load_offsetted = std::make_unique(offset_ptr.get()); + auto load_offsetted = + std::make_unique(offset_ptr.get()); load_offsetted->ret_type = load_data_type; // create bit shift and bit and operations auto base_shift_offset = @@ -86,8 +92,8 @@ class BitLoopVectorize : public IRVisitor { auto base_shift_op = std::make_unique( base_shift_opcode, load_base.get(), base_shift_offset.get()); - auto offsetted_shift_offset = - std::make_unique(TypedConstant(load_data_type, bit_vectorize - 1)); + auto offsetted_shift_offset = std::make_unique( + TypedConstant(load_data_type, bit_vectorize - 1)); auto offsetted_shift_opcode = diff.low == -1 ? BinaryOpType::bit_sar : BinaryOpType::bit_shl; auto offsetted_shift_op = std::make_unique( From eddb3f799b29575c5958ecc4c5bda01ea43806f3 Mon Sep 17 00:00:00 2001 From: Xuanda Yang Date: Thu, 31 Dec 2020 23:24:57 +0800 Subject: [PATCH 10/16] Update tests/python/test_bit_array_vectorization.py Co-authored-by: Yuanming Hu --- tests/python/test_bit_array_vectorization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/test_bit_array_vectorization.py b/tests/python/test_bit_array_vectorization.py index b6574ab8c7cde..4076f6787c203 100644 --- a/tests/python/test_bit_array_vectorization.py +++ b/tests/python/test_bit_array_vectorization.py @@ -43,7 +43,7 @@ def verify(): verify() -@ti.test(require=ti.extension.quant, debug=True, cfg_optimization=False) +@ti.test(require=ti.extension.quant) def test_offset_load(): ci1 = ti.type_factory_.get_custom_int_type(1, False) From c2fcfdbb68af742e49c2eb2674e13a7cce80b4ba Mon Sep 17 00:00:00 2001 From: Xuanda Yang Date: Thu, 31 Dec 2020 23:25:07 +0800 Subject: [PATCH 11/16] Update tests/python/test_bit_array_vectorization.py Co-authored-by: Yuanming Hu --- tests/python/test_bit_array_vectorization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/test_bit_array_vectorization.py b/tests/python/test_bit_array_vectorization.py index 4076f6787c203..1331aed2a45b3 100644 --- a/tests/python/test_bit_array_vectorization.py +++ b/tests/python/test_bit_array_vectorization.py @@ -45,7 +45,7 @@ def verify(): @ti.test(require=ti.extension.quant) def test_offset_load(): - ci1 = ti.type_factory_.get_custom_int_type(1, False) + ci1 = ti.type_factory.custom_int(1, False) x = ti.field(dtype=ci1) y = ti.field(dtype=ci1) From f947103e2ede762e49b3287b3187a64b665cac2d Mon Sep 17 00:00:00 2001 From: Xuanda Yang Date: Thu, 31 Dec 2020 23:25:20 +0800 Subject: [PATCH 12/16] Update tests/python/test_bit_array_vectorization.py Co-authored-by: Yuanming Hu --- tests/python/test_bit_array_vectorization.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/python/test_bit_array_vectorization.py b/tests/python/test_bit_array_vectorization.py index 1331aed2a45b3..c705c89d8a53a 100644 --- a/tests/python/test_bit_array_vectorization.py +++ b/tests/python/test_bit_array_vectorization.py @@ -55,6 +55,7 @@ def test_offset_load(): n_blocks = 4 bits = 32 boundary_offset = 1024 + assert boundary_offset >= N // n_blocks block = ti.root.pointer(ti.ij, (n_blocks, n_blocks)) block.dense(ti.ij, (N // n_blocks, N // (bits * n_blocks)))._bit_array( From ecdf788136e64b764d4b7654affa96bbe74eb35c Mon Sep 17 00:00:00 2001 From: Xuanda Yang Date: Thu, 31 Dec 2020 23:25:50 +0800 Subject: [PATCH 13/16] Update taichi/transforms/bit_loop_vectorize.cpp Co-authored-by: Yuanming Hu --- taichi/transforms/bit_loop_vectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taichi/transforms/bit_loop_vectorize.cpp b/taichi/transforms/bit_loop_vectorize.cpp index 84b284ad9986c..02336580cfc21 100644 --- a/taichi/transforms/bit_loop_vectorize.cpp +++ b/taichi/transforms/bit_loop_vectorize.cpp @@ -69,7 +69,7 @@ class BitLoopVectorize : public IRVisitor { load_base->ret_type = load_data_type; // load x[i, j + 1](offsetted) // since we are doing vectorization, the actual data should be x[i, - // j + 32] + // j + vectorization_width] auto offset_constant = std::make_unique(TypedConstant(bit_vectorize)); auto offset_index_opcode = From 39e210a1b2efdd382d2e7f61403ace80609f5c4e Mon Sep 17 00:00:00 2001 From: Xuanda Yang Date: Thu, 31 Dec 2020 23:29:33 +0800 Subject: [PATCH 14/16] test all 4 offset cases --- tests/python/test_bit_array_vectorization.py | 21 ++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/python/test_bit_array_vectorization.py b/tests/python/test_bit_array_vectorization.py index c705c89d8a53a..115f387426f0e 100644 --- a/tests/python/test_bit_array_vectorization.py +++ b/tests/python/test_bit_array_vectorization.py @@ -72,19 +72,24 @@ def init(): x[i, j] = ti.random(dtype=ti.i32) % 2 @ti.kernel - def assign_vectorized(): + def assign_vectorized(dx: ti.template(), dy: ti.template()): ti.bit_vectorize(32) for i, j in x: - y[i, j] = x[i, j + 1] - z[i, j] = x[i, j - 1] + y[i, j] = x[i + dx, j + dy] + z[i, j] = x[i + dx, j + dy] @ti.kernel - def verify(): + def verify(dx: ti.template(), dy: ti.template()): for i, j in ti.ndrange((boundary_offset, N - boundary_offset), (boundary_offset, N - boundary_offset)): - assert y[i, j] == x[i, j + 1] - assert z[i, j] == x[i, j - 1] + assert y[i, j] == x[i + dx, j + dy] init() - assign_vectorized() - verify() + assign_vectorized(0, 1) + verify(0, 1) + assign_vectorized(1, 0) + verify(1, 0) + assign_vectorized(0, -1) + verify(0, -1) + assign_vectorized(-1, 0) + verify(-1, 0) \ No newline at end of file From 530c011dce1a7945b47635a05ebce72912438ba0 Mon Sep 17 00:00:00 2001 From: Taichi Gardener Date: Thu, 31 Dec 2020 10:30:12 -0500 Subject: [PATCH 15/16] [skip ci] enforce code format --- tests/python/test_bit_array_vectorization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/test_bit_array_vectorization.py b/tests/python/test_bit_array_vectorization.py index 115f387426f0e..2e1b7dc0cbb9f 100644 --- a/tests/python/test_bit_array_vectorization.py +++ b/tests/python/test_bit_array_vectorization.py @@ -92,4 +92,4 @@ def verify(dx: ti.template(), dy: ti.template()): assign_vectorized(0, -1) verify(0, -1) assign_vectorized(-1, 0) - verify(-1, 0) \ No newline at end of file + verify(-1, 0) From 363ee9d4d0cbbba757d31ce38393d8909aa1619d Mon Sep 17 00:00:00 2001 From: Xuanda Yang Date: Fri, 1 Jan 2021 04:34:26 +0800 Subject: [PATCH 16/16] rerun