Skip to content

Commit

Permalink
[CPU][CUDA] Support bitmasked as the leaf block structure for `1x1x…
Browse files Browse the repository at this point in the history
…1` masks (#676)

* fixed CUDA misalignment; added test_bitmasked_bitmasked

* passed test_bitmasked_bitmasked

* restored all tests

* finalized. Thanks for @k-ye for the test case

* apply review patches

* comment and format
  • Loading branch information
yuanming-hu authored Mar 29, 2020
1 parent 58c64ba commit c2d1f85
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 17 deletions.
26 changes: 17 additions & 9 deletions taichi/codegen/codegen_llvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,9 @@
#pragma once

#include <set>
#include <taichi/common/util.h>
#include <taichi/util/io.h>

#include "taichi/ir/ir.h"
#include "taichi/program/program.h"
#include "taichi/lang_util.h"

#include "taichi/llvm/llvm_codegen_utils.h"

TLANG_NAMESPACE_BEGIN
Expand Down Expand Up @@ -1428,8 +1424,11 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {

current_coordinates = new_coordinates;

// Additional compare if non-POT exists
auto nonpot_cond = tlctx->get_constant(true);
// exec_cond: safe-guard the execution of loop body:
// - if non-POT tensor dim exists, make sure we don't go out of bounds
// - if leaf block is bitmasked, make sure we only loop over active
// voxels
auto exec_cond = tlctx->get_constant(true);
auto snode = stmt->snode;

auto coord_object = RuntimeObject("PhysicalCoordinates", this,
Expand All @@ -1438,20 +1437,29 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
auto j = snode->physical_index_position[i];
if (!bit::is_power_of_two(snode->extractors[j].num_elements)) {
auto coord = coord_object.get("val", tlctx->get_constant(j));
nonpot_cond = builder->CreateAnd(
nonpot_cond,
exec_cond = builder->CreateAnd(
exec_cond,
builder->CreateICmp(
llvm::CmpInst::ICMP_SLT, coord,
tlctx->get_constant(snode->extractors[j].num_elements)));
}
}

if (snode->type == SNodeType::bitmasked) {
// test if current voxel is active or not
auto is_active = call(snode, element.get("element"), "is_active",
{builder->CreateLoad(loop_index)});
is_active = builder->CreateTrunc(is_active,
llvm::Type::getInt1Ty(*llvm_context));
exec_cond = builder->CreateAnd(exec_cond, is_active);
}

auto body_bb_tail =
BasicBlock::Create(*llvm_context, "loop_body_tail", func);
{
auto bounded_body_bb =
BasicBlock::Create(*llvm_context, "bound_guarded_loop_body", func);
builder->CreateCondBr(nonpot_cond, bounded_body_bb, body_bb_tail);
builder->CreateCondBr(exec_cond, bounded_body_bb, body_bb_tail);
builder->SetInsertPoint(bounded_body_bb);
// The real loop body
stmt->body->accept(this);
Expand Down
6 changes: 2 additions & 4 deletions taichi/llvm/llvm_codegen_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@ void check_func_call_signature(llvm::Value *func,
for (int i = 0; i < num_params; i++) {
auto required = func_type->getFunctionParamType(i);
auto provided = arglist[i]->getType();
// TI_INFO(" required from context {}", (void *)&required->getContext());
// TI_INFO(" provided from context {}", (void *)&provided->getContext());
if (required != provided) {
// TI_INFO("Function : {}", std::string(func->getName()));
// TI_INFO(" Type : {}", type_name(func->getType()));
TI_INFO("Function : {}", std::string(func->getName()));
TI_INFO(" Type : {}", type_name(func->getType()));
if (&required->getContext() != &provided->getContext()) {
TI_INFO(" parameter {} types are from different contexts", i);
TI_INFO(" required from context {}",
Expand Down
4 changes: 2 additions & 2 deletions taichi/runtime/llvm/node_bitmasked.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ void Bitmasked_activate(Ptr meta, Ptr node, int i) {
auto element_size = StructMeta_get_element_size(smeta);
auto num_elements = Bitmasked_get_num_elements(meta, node);
auto data_section_size = element_size * num_elements;
auto mask_begin = (uint64 *)(node + data_section_size);
atomic_or_u64(&mask_begin[i / 64], 1UL << (i % 64));
auto mask_begin = (u32 *)(node + data_section_size);
atomic_or_u32(&mask_begin[i / 32], 1UL << (i % 32));
}

i32 Bitmasked_is_active(Ptr meta, Ptr node, int i) {
Expand Down
28 changes: 26 additions & 2 deletions tests/python/test_bitmasked.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def run():
@ti.kernel
def sum():
for i, j in x:
ti.atomic_add(c[None], ti.is_active(bm, [i, j]))
ti.atomic_add(s[None], x[i, j])
c[None] += ti.is_active(bm, [i, j])
s[None] += x[i, j]

run()
sum()
Expand Down Expand Up @@ -58,3 +58,27 @@ def func():

func()
assert s[None] == 256


@archs_support_bitmasked
def test_bitmasked_bitmasked():
x = ti.var(ti.f32)
s = ti.var(ti.i32)

n = 128

ti.root.bitmasked(ti.i, n).bitmasked(ti.i, n).place(x)
ti.root.place(s)

@ti.kernel
def func():
for i in x:
s[None] += 1

x[0] = 1
x[127] = 1
x[256] = 1
x[257] = 1

func()
assert s[None] == 4

0 comments on commit c2d1f85

Please sign in to comment.