Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[llvm] Add support for data types of different sizes on dynamic SNode #6490

Merged
merged 5 commits into from
Nov 2, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions taichi/analysis/data_source_analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ std::vector<Stmt *> get_store_destination(Stmt *store_stmt) {
return std::vector<Stmt *>(1, global_store->dest);
} else if (auto atomic = store_stmt->cast<AtomicOpStmt>()) {
return std::vector<Stmt *>(1, atomic->dest);
} else if (auto snode_op = store_stmt->cast<SNodeOpStmt>()) {
if (snode_op->op_type == SNodeOpType::allocate) {
return {snode_op->val, snode_op->ptr};
} else {
return {};
}
} else if (auto external_func = store_stmt->cast<ExternalFuncCallStmt>()) {
if (store_stmt->cast<ExternalFuncCallStmt>()->type ==
ExternalFuncCallStmt::BITCODE) {
Expand Down
10 changes: 6 additions & 4 deletions taichi/codegen/llvm/codegen_llvm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1367,11 +1367,13 @@ void TaskCodeGenLLVM::visit(AssertStmt *stmt) {

void TaskCodeGenLLVM::visit(SNodeOpStmt *stmt) {
auto snode = stmt->snode;
if (stmt->op_type == SNodeOpType::append) {
if (stmt->op_type == SNodeOpType::allocate) {
TI_ASSERT(snode->type == SNodeType::dynamic);
TI_ASSERT(stmt->ret_type->is_primitive(PrimitiveTypeID::i32));
llvm_val[stmt] =
call(snode, llvm_val[stmt->ptr], "append", {llvm_val[stmt->val]});
TI_ASSERT(stmt->ret_type.is_pointer() &&
stmt->ret_type.ptr_removed()->is_primitive(PrimitiveTypeID::gen));
auto ptr =
call(snode, llvm_val[stmt->ptr], "allocate", {llvm_val[stmt->val]});
llvm_val[stmt] = ptr;
} else if (stmt->op_type == SNodeOpType::length) {
TI_ASSERT(snode->type == SNodeType::dynamic);
llvm_val[stmt] = call(snode, llvm_val[stmt->ptr], "get_num_elements", {});
Expand Down
3 changes: 2 additions & 1 deletion taichi/ir/control_flow_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,8 @@ void ControlFlowGraph::reaching_definition_analysis(bool after_lower_access) {
(!after_lower_access &&
(stmt->is<GlobalPtrStmt>() || stmt->is<ExternalPtrStmt>() ||
stmt->is<BlockLocalPtrStmt>() || stmt->is<ThreadLocalPtrStmt>() ||
stmt->is<GlobalTemporaryStmt>() || stmt->is<MatrixPtrStmt>()))) {
stmt->is<GlobalTemporaryStmt>() || stmt->is<MatrixPtrStmt>() ||
stmt->is<GetChStmt>()))) {
// TODO: unify them
// A global pointer that may contain some data before this kernel.
nodes[start_node]->reach_gen.insert(stmt);
Expand Down
10 changes: 7 additions & 3 deletions taichi/ir/frontend_ir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -926,13 +926,17 @@ void SNodeOpExpression::flatten(FlattenContext *ctx) {
ctx->push_back<SNodeOpStmt>(SNodeOpType::get_addr, snode, ptr, nullptr);
} else if (op_type == SNodeOpType::append) {
flatten_rvalue(value, ctx);
ctx->push_back<SNodeOpStmt>(SNodeOpType::append, snode, ptr, value->stmt);

auto alloca = ctx->push_back<AllocaStmt>(PrimitiveType::i32);
auto addr =
ctx->push_back<SNodeOpStmt>(SNodeOpType::allocate, snode, ptr, alloca);
auto ch_addr = ctx->push_back<GetChStmt>(addr, snode, 0);
ctx->push_back<GlobalStoreStmt>(ch_addr, value->stmt);
ctx->push_back<LocalLoadStmt>(alloca);
TI_ERROR_IF(snode->type != SNodeType::dynamic,
"ti.append only works on dynamic nodes.");
TI_ERROR_IF(snode->ch.size() != 1,
"ti.append only works on single-child dynamic nodes.");
TI_ERROR_IF(data_type_size(snode->ch[0]->dt) != 4,
"ti.append only works on i32/f32 nodes.");
}
stmt = ctx->back_stmt();
}
Expand Down
13 changes: 12 additions & 1 deletion taichi/ir/statements.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ bool SNodeOpStmt::activation_related(SNodeOpType op) {
}

bool SNodeOpStmt::need_activation(SNodeOpType op) {
return op == SNodeOpType::activate || op == SNodeOpType::append;
return op == SNodeOpType::activate || op == SNodeOpType::append ||
op == SNodeOpType::allocate;
}

ExternalTensorShapeAlongAxisStmt::ExternalTensorShapeAlongAxisStmt(int axis,
Expand Down Expand Up @@ -273,6 +274,16 @@ GetChStmt::GetChStmt(Stmt *input_ptr, int chid, bool is_bit_vectorized)
TI_STMT_REG_FIELDS;
}

GetChStmt::GetChStmt(Stmt *input_ptr,
SNode *snode,
int chid,
bool is_bit_vectorized)
: input_ptr(input_ptr), chid(chid), is_bit_vectorized(is_bit_vectorized) {
input_snode = snode;
output_snode = input_snode->ch[chid].get();
TI_STMT_REG_FIELDS;
}

OffloadedStmt::OffloadedStmt(TaskType task_type, Arch arch)
: task_type(task_type), device(arch) {
if (has_body()) {
Expand Down
4 changes: 4 additions & 0 deletions taichi/ir/statements.h
Original file line number Diff line number Diff line change
Expand Up @@ -1182,6 +1182,10 @@ class GetChStmt : public Stmt {
bool is_bit_vectorized;

GetChStmt(Stmt *input_ptr, int chid, bool is_bit_vectorized = false);
GetChStmt(Stmt *input_ptr,
SNode *snode,
int chid,
bool is_bit_vectorized = false);

bool has_global_side_effect() const override {
return false;
Expand Down
1 change: 1 addition & 0 deletions taichi/ir/stmt_op_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ std::string snode_op_type_name(SNodeOpType type) {
REGISTER_TYPE(activate);
REGISTER_TYPE(deactivate);
REGISTER_TYPE(append);
REGISTER_TYPE(allocate);
REGISTER_TYPE(clear);
REGISTER_TYPE(undefined);

Expand Down
1 change: 1 addition & 0 deletions taichi/ir/stmt_op_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ enum class SNodeOpType : int {
activate,
deactivate,
append,
allocate,
clear,
undefined
};
Expand Down
9 changes: 4 additions & 5 deletions taichi/runtime/llvm/runtime_module/node_dynamic.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,12 @@ void Dynamic_deactivate(Ptr meta_, Ptr node_) {
}
}

i32 Dynamic_append(Ptr meta_, Ptr node_, i32 data) {
Ptr Dynamic_allocate(Ptr meta_, Ptr node_, i32 *len) {
auto meta = (DynamicMeta *)(meta_);
auto node = (DynamicNode *)(node_);
auto chunk_size = meta->chunk_size;
auto i = atomic_add_i32(&node->n, 1);
*len = i;
int chunk_start = 0;
auto p_chunk_ptr = &node->ptr;
while (true) {
Expand All @@ -76,14 +77,12 @@ i32 Dynamic_append(Ptr meta_, Ptr node_, i32 data) {
});
}
if (i < chunk_start + chunk_size) {
*(i32 *)(*p_chunk_ptr + sizeof(Ptr) +
(i - chunk_start) * meta->element_size) = data;
break;
return *p_chunk_ptr + sizeof(Ptr) +
(i - chunk_start) * meta->element_size;
}
p_chunk_ptr = (Ptr *)(*p_chunk_ptr);
chunk_start += chunk_size;
}
return i;
lin-hitonami marked this conversation as resolved.
Show resolved Hide resolved
}

i32 Dynamic_is_active(Ptr meta_, Ptr node_, int i) {
Expand Down
3 changes: 3 additions & 0 deletions taichi/transforms/type_check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ class TypeCheck : public IRVisitor {
void visit(SNodeOpStmt *stmt) override {
if (stmt->op_type == SNodeOpType::get_addr) {
stmt->ret_type = PrimitiveType::u64;
} else if (stmt->op_type == SNodeOpType::allocate) {
stmt->ret_type = PrimitiveType::gen;
stmt->ret_type.set_is_pointer(true);
} else {
stmt->ret_type = PrimitiveType::i32;
}
Expand Down
2 changes: 1 addition & 1 deletion tests/python/test_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def func():
assert not a[11]


@test_utils.test(require=ti.extension.sparse)
@test_utils.test(require=ti.extension.sparse, exclude=[ti.metal])
def test_no_duplicate_eval():
a = ti.field(ti.i32)
ti.root.dynamic(ti.i, 256).place(a)
Expand Down
60 changes: 48 additions & 12 deletions tests/python/test_dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from tests import test_utils


@test_utils.test(require=ti.extension.sparse)
@test_utils.test(require=ti.extension.sparse, exclude=[ti.metal])
def test_dynamic():
x = ti.field(ti.f32)
n = 128
Expand All @@ -23,7 +23,7 @@ def func():
assert x[i] == i


@test_utils.test(require=ti.extension.sparse)
@test_utils.test(require=ti.extension.sparse, exclude=[ti.metal])
def test_dynamic2():
x = ti.field(ti.f32)
n = 128
Expand All @@ -41,7 +41,7 @@ def func():
assert x[i] == i


@test_utils.test(require=ti.extension.sparse)
@test_utils.test(require=ti.extension.sparse, exclude=[ti.metal])
def test_dynamic_matrix():
x = ti.Matrix.field(2, 1, dtype=ti.i32)
n = 8192
Expand All @@ -64,7 +64,7 @@ def func():
assert b == 0


@test_utils.test(require=ti.extension.sparse)
@test_utils.test(require=ti.extension.sparse, exclude=[ti.metal])
def test_append():
x = ti.field(ti.i32)
n = 128
Expand All @@ -86,7 +86,7 @@ def func():
assert elements[i] == i


@test_utils.test(require=ti.extension.sparse)
@test_utils.test(require=ti.extension.sparse, exclude=[ti.metal])
def test_length():
x = ti.field(ti.i32)
y = ti.field(ti.f32, shape=())
Expand All @@ -110,7 +110,7 @@ def get_len():
assert y[None] == n


@test_utils.test(require=ti.extension.sparse)
@test_utils.test(require=ti.extension.sparse, exclude=[ti.metal])
def test_append_ret_value():
x = ti.field(ti.i32)
y = ti.field(ti.i32)
Expand All @@ -135,14 +135,14 @@ def func():
assert x[i] + 3 == z[i]


@test_utils.test(require=ti.extension.sparse)
@test_utils.test(require=ti.extension.sparse, exclude=[ti.metal])
def test_dense_dynamic():
# The spin lock implementation has triggered a bug in CUDA, the end result
# being that appending to Taichi's dynamic node messes up its length. See
# https://stackoverflow.com/questions/65995357/cuda-spinlock-implementation-with-independent-thread-scheduling-supported
# CUDA 11.2 didn't fix this bug, unfortunately.
if ti.lang.impl.current_cfg().arch == ti.cuda:
pytest.skip('CUDA spinlock bug')
# if ti.lang.impl.current_cfg().arch == ti.cuda:
lin-hitonami marked this conversation as resolved.
Show resolved Hide resolved
# pytest.skip('CUDA spinlock bug')

n = 128
x = ti.field(ti.i32)
Expand All @@ -166,7 +166,7 @@ def func():
assert l[i] == n


@test_utils.test(require=ti.extension.sparse)
@test_utils.test(require=ti.extension.sparse, exclude=[ti.metal])
def test_dense_dynamic_len():
n = 128
x = ti.field(ti.i32)
Expand All @@ -185,7 +185,7 @@ def func():
assert l[i] == 0


@test_utils.test(require=ti.extension.sparse)
@test_utils.test(require=ti.extension.sparse, exclude=[ti.metal])
def test_dynamic_activate():
# record the lengths
l = ti.field(ti.i32, 3)
Expand All @@ -212,7 +212,7 @@ def func():
assert l[2] == 21


@test_utils.test(require=ti.extension.sparse)
@test_utils.test(require=ti.extension.sparse, exclude=[ti.metal])
def test_append_vec():
x = ti.Vector.field(3, ti.f32)
block = ti.root.dense(ti.i, 16)
Expand All @@ -229,3 +229,39 @@ def make_lists():
with pytest.raises(TaichiCompilationError,
match=r'append only supports appending a scalar value'):
make_lists()


@test_utils.test(require=ti.extension.sparse, exclude=[ti.metal])
def test_append_u8():
x = ti.field(ti.u8)
pixel = ti.root.dynamic(ti.j, 20)
pixel.place(x)

@ti.kernel
def make_list():
ti.loop_config(serialize=True)
for i in range(20):
x[()].append(i * i * i)

make_list()

for i in range(20):
assert x[i] == i * i * i % 256


@test_utils.test(require=ti.extension.sparse, exclude=[ti.metal])
def test_append_u64():
x = ti.field(ti.u64)
pixel = ti.root.dynamic(ti.i, 20)
pixel.place(x)

@ti.kernel
def make_list():
ti.loop_config(serialize=True)
for i in range(20):
x[()].append(i * i * i * ti.u64(10000000000))

make_list()

for i in range(20):
assert x[i] == i * i * i * 10000000000
2 changes: 1 addition & 1 deletion tests/python/test_dynamic_append_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test():
test()


@test_utils.test(exclude=[ti.cc, ti.opengl, ti.vulkan],
@test_utils.test(exclude=[ti.cc, ti.opengl, ti.vulkan, ti.metal],
default_fp=ti.f32,
debug=True)
def test_dynamic_append_length_f32():
Expand Down
2 changes: 1 addition & 1 deletion tests/python/test_sparse_deactivate.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def clear_temp():
assert xn[i, j] == i + j


@test_utils.test(require=ti.extension.sparse)
@test_utils.test(require=ti.extension.sparse, exclude=[ti.metal])
def test_dynamic():
x = ti.field(ti.i32)
s = ti.field(ti.i32)
Expand Down
4 changes: 2 additions & 2 deletions tests/python/test_struct_for_dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from tests import test_utils


@test_utils.test(exclude=[ti.opengl, ti.cc, ti.vulkan])
@test_utils.test(exclude=[ti.opengl, ti.cc, ti.vulkan, ti.metal])
def test_dynamic():
x = ti.field(ti.i32)
y = ti.field(ti.i32, shape=())
Expand All @@ -23,7 +23,7 @@ def count():
assert y[None] == n // 3 + 1


@test_utils.test(exclude=[ti.opengl, ti.cc, ti.vulkan])
@test_utils.test(exclude=[ti.opengl, ti.cc, ti.vulkan, ti.metal])
def test_dense_dynamic():
n = 128

Expand Down