Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CPU][CUDA] Support bitmasked as the leaf block structure for 1x1x1 masks #676

Merged
merged 6 commits into from
Mar 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions taichi/codegen/codegen_llvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,9 @@
#pragma once

#include <set>
#include <taichi/common/util.h>
#include <taichi/util/io.h>

#include "taichi/ir/ir.h"
#include "taichi/program/program.h"
#include "taichi/lang_util.h"

#include "taichi/llvm/llvm_codegen_utils.h"

TLANG_NAMESPACE_BEGIN
Expand Down Expand Up @@ -1428,8 +1424,11 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {

current_coordinates = new_coordinates;

// Additional compare if non-POT exists
auto nonpot_cond = tlctx->get_constant(true);
// exec_cond: safe-guard the execution of loop body:
// - if non-POT tensor dim exists, make sure we don't go out of bounds
// - if leaf block is bitmasked, make sure we only loop over active
// voxels
auto exec_cond = tlctx->get_constant(true);
auto snode = stmt->snode;

auto coord_object = RuntimeObject("PhysicalCoordinates", this,
Expand All @@ -1438,20 +1437,29 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
auto j = snode->physical_index_position[i];
if (!bit::is_power_of_two(snode->extractors[j].num_elements)) {
auto coord = coord_object.get("val", tlctx->get_constant(j));
nonpot_cond = builder->CreateAnd(
nonpot_cond,
exec_cond = builder->CreateAnd(
exec_cond,
builder->CreateICmp(
llvm::CmpInst::ICMP_SLT, coord,
tlctx->get_constant(snode->extractors[j].num_elements)));
}
}

if (snode->type == SNodeType::bitmasked) {
// test if current voxel is active or not
auto is_active = call(snode, element.get("element"), "is_active",
{builder->CreateLoad(loop_index)});
is_active = builder->CreateTrunc(is_active,
llvm::Type::getInt1Ty(*llvm_context));
exec_cond = builder->CreateAnd(exec_cond, is_active);
}

auto body_bb_tail =
BasicBlock::Create(*llvm_context, "loop_body_tail", func);
{
auto bounded_body_bb =
BasicBlock::Create(*llvm_context, "bound_guarded_loop_body", func);
builder->CreateCondBr(nonpot_cond, bounded_body_bb, body_bb_tail);
builder->CreateCondBr(exec_cond, bounded_body_bb, body_bb_tail);
builder->SetInsertPoint(bounded_body_bb);
// The real loop body
stmt->body->accept(this);
Expand Down
6 changes: 2 additions & 4 deletions taichi/llvm/llvm_codegen_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@ void check_func_call_signature(llvm::Value *func,
for (int i = 0; i < num_params; i++) {
auto required = func_type->getFunctionParamType(i);
auto provided = arglist[i]->getType();
// TI_INFO(" required from context {}", (void *)&required->getContext());
// TI_INFO(" provided from context {}", (void *)&provided->getContext());
if (required != provided) {
// TI_INFO("Function : {}", std::string(func->getName()));
// TI_INFO(" Type : {}", type_name(func->getType()));
TI_INFO("Function : {}", std::string(func->getName()));
TI_INFO(" Type : {}", type_name(func->getType()));
if (&required->getContext() != &provided->getContext()) {
TI_INFO(" parameter {} types are from different contexts", i);
TI_INFO(" required from context {}",
Expand Down
4 changes: 2 additions & 2 deletions taichi/runtime/llvm/node_bitmasked.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ void Bitmasked_activate(Ptr meta, Ptr node, int i) {
auto element_size = StructMeta_get_element_size(smeta);
auto num_elements = Bitmasked_get_num_elements(meta, node);
auto data_section_size = element_size * num_elements;
auto mask_begin = (uint64 *)(node + data_section_size);
atomic_or_u64(&mask_begin[i / 64], 1UL << (i % 64));
auto mask_begin = (u32 *)(node + data_section_size);
atomic_or_u32(&mask_begin[i / 32], 1UL << (i % 32));
}

i32 Bitmasked_is_active(Ptr meta, Ptr node, int i) {
Expand Down
28 changes: 26 additions & 2 deletions tests/python/test_bitmasked.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def run():
@ti.kernel
def sum():
for i, j in x:
ti.atomic_add(c[None], ti.is_active(bm, [i, j]))
ti.atomic_add(s[None], x[i, j])
c[None] += ti.is_active(bm, [i, j])
s[None] += x[i, j]

run()
sum()
Expand Down Expand Up @@ -58,3 +58,27 @@ def func():

func()
assert s[None] == 256


@archs_support_bitmasked
def test_bitmasked_bitmasked():
x = ti.var(ti.f32)
s = ti.var(ti.i32)

n = 128

ti.root.bitmasked(ti.i, n).bitmasked(ti.i, n).place(x)
ti.root.place(s)

@ti.kernel
def func():
for i in x:
s[None] += 1

x[0] = 1
x[127] = 1
x[256] = 1
x[257] = 1

func()
assert s[None] == 4