Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Opt] Identical store/load elimination by control-flow graph #1741

Merged
merged 4 commits into from
Aug 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 113 additions & 9 deletions taichi/ir/control_flow_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,14 @@ Stmt *CFGNode::get_store_forwarding_data(Stmt *var, int position) const {
return result;
}
Stmt *result = nullptr;
bool result_visible = false;
auto visible = [&](Stmt *stmt) {
// Do we need to check if `stmt` is before `position` here?
// Check if `stmt` is before `position` here.
if (stmt->parent == block) {
return stmt->parent->locate(stmt) < position;
}
// TODO: What if `stmt` appears in an ancestor of `block` but after
// `position`?
return parent_blocks.find(stmt->parent) != parent_blocks.end();
};
auto update_result = [&](Stmt *stmt) {
Expand All @@ -190,16 +196,22 @@ Stmt *CFGNode::get_store_forwarding_data(Stmt *var, int position) const {
}
if (!result) {
result = data;
} else if (!irpass::analysis::same_statements(result, data)) {
result_visible = visible(data);
return true; // continue the following loops
}
if (!irpass::analysis::same_statements(result, data)) {
// check the special case of alloca (initialized to 0)
if (!(result->is<AllocaStmt>() && data->is<ConstStmt>() &&
data->width() == 1 &&
data->as<ConstStmt>()->val[0].equal_value(0))) {
return false; // return nullptr
}
}
if (visible(data))
if (!result_visible && visible(data)) {
// pick the visible one for store-to-load forwarding
result = data;
result_visible = true;
}
return true; // continue the following loops
};
for (auto stmt : reach_in) {
Expand All @@ -223,7 +235,7 @@ Stmt *CFGNode::get_store_forwarding_data(Stmt *var, int position) const {
block->statements[position]->id);
return nullptr;
}
if (!visible(result)) {
if (!result_visible) {
return nullptr;
}
return result;
Expand Down Expand Up @@ -264,6 +276,46 @@ bool CFGNode::store_to_load_forwarding(bool after_lower_access) {
i--; // to cancel i++ in the for loop
modified = true;
}
continue;
}
// Identical store elimination
if (auto local_store = stmt->cast<LocalStoreStmt>()) {
result = get_store_forwarding_data(local_store->ptr, i);
if (result) {
if (result->is<AllocaStmt>()) {
// special case of alloca (initialized to 0)
if (auto stored_data = local_store->data->cast<ConstStmt>()) {
bool all_zero = true;
for (auto &val : stored_data->val.data) {
if (!val.equal_value(0)) {
all_zero = false;
break;
}
}
if (all_zero) {
erase(i); // This causes end_location--
i--; // to cancel i++ in the for loop
modified = true;
}
}
} else {
// not alloca
if (irpass::analysis::same_statements(result, local_store->data)) {
erase(i); // This causes end_location--
i--; // to cancel i++ in the for loop
modified = true;
}
}
}
} else if (auto global_store = stmt->cast<GlobalStoreStmt>()) {
if (!after_lower_access) {
result = get_store_forwarding_data(global_store->ptr, i);
if (irpass::analysis::same_statements(result, global_store->data)) {
erase(i); // This causes end_location--
i--; // to cancel i++ in the for loop
modified = true;
}
}
}
}
return modified;
Expand Down Expand Up @@ -342,6 +394,7 @@ bool CFGNode::dead_store_elimination(bool after_lower_access) {
replace_with(i, std::move(local_load), true);
// Notice that we have a load here.
live_in_this_node.insert(atomic->dest);
killed_in_this_node.erase(atomic->dest);
modified = true;
continue;
} else if (!is_parallel_executed) {
Expand All @@ -354,6 +407,11 @@ bool CFGNode::dead_store_elimination(bool after_lower_access) {
replace_with(i, std::move(global_load), true);
// Notice that we have a load here.
live_in_this_node.insert(atomic->dest);
// Note: It's possible that a global pointer is not erased from
// killed_in_this_node although it should be. This may harm the
// performance of identical load elimination but it's faster than
// checking the contents one by one.
killed_in_this_node.erase(atomic->dest);
modified = true;
continue;
}
Expand All @@ -375,11 +433,55 @@ bool CFGNode::dead_store_elimination(bool after_lower_access) {
}
}
auto load_ptrs = irpass::analysis::get_load_pointers(stmt);
if (load_ptrs.size() == 1 && store_ptrs.empty() && stmt->width() == 1) {
// Identical load elimination
auto load_ptr = load_ptrs.front();
if (!after_lower_access ||
(load_ptr->is<AllocaStmt>() || load_ptr->is<StackAllocaStmt>())) {
// After lower_access, we only analyze local variables and stacks.
if (!may_contain_variable(killed_in_this_node, load_ptr) &&
contain_variable(live_in_this_node, load_ptr)) {
// Only perform identical load elimination within a CFGNode.
for (int j = i + 1; j < end_location; j++) {
auto next_load_ptrs =
irpass::analysis::get_load_pointers(block->statements[j].get());
bool found = false;
for (auto &next_load_ptr : next_load_ptrs) {
if (irpass::analysis::maybe_same_address(load_ptr,
next_load_ptr)) {
found = true;
break;
}
}
if (found) {
if (irpass::analysis::same_statements(
stmt, block->statements[j].get())) {
block->statements[j]->replace_with(stmt);
erase(j);
modified = true;
break;
} else {
TI_WARN("Identical load elimination failed.");
}
}
}
}
}
}
for (auto &load_ptr : load_ptrs) {
if (!after_lower_access ||
(load_ptr->is<AllocaStmt>() || load_ptr->is<StackAllocaStmt>())) {
// After lower_access, we only analyze local variables and stacks.
live_in_this_node.insert(load_ptr);
if (store_ptrs.empty()) {
// Only allow identical load elimination (i.e. allow this statement
// to be eliminated) if this statement doesn't store any data.
// Note: It's possible that a global pointer is not erased from
// killed_in_this_node although it should be. This may harm the
// performance of identical load elimination but it's faster than
// checking the contents one by one.
killed_in_this_node.erase(load_ptr);
}
}
}
}
Expand Down Expand Up @@ -484,12 +586,14 @@ void ControlFlowGraph::reaching_definition_analysis(bool after_lower_access) {
if (!after_lower_access) {
for (int i = 0; i < num_nodes; i++) {
for (int j = nodes[i]->begin_location; j < nodes[i]->end_location; j++) {
if (auto global_load =
nodes[i]->block->statements[j]->cast<GlobalLoadStmt>()) {
nodes[start_node]->reach_gen.insert(global_load->ptr);
auto stmt = nodes[i]->block->statements[j].get();
if (stmt->is<GlobalPtrStmt>() || stmt->is<ExternalPtrStmt>() ||
stmt->is<BlockLocalPtrStmt>() || stmt->is<ThreadLocalPtrStmt>() ||
stmt->is<GlobalTemporaryStmt>()) {
// TODO: unify them
// A global pointer that may contain some data before this kernel.
nodes[start_node]->reach_gen.insert(stmt);
}
// Since we only do store-to-load forwarding, we don't need to mark
// other global pointers' data source at the start node.
}
}
}
Expand Down
5 changes: 5 additions & 0 deletions taichi/ir/control_flow_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,14 @@ class ControlFlowGraph {
void live_variable_analysis(bool after_lower_access);

void simplify_graph();

// This pass cannot eliminate container statements properly for now.
bool unreachable_code_elimination();

// Also performs identical store elimination.
bool store_to_load_forwarding(bool after_lower_access);

// Also performs identical load elimination.
bool dead_store_elimination(bool after_lower_access);
};

Expand Down
Loading