Skip to content

Commit

Permalink
Compile sync without sync reg (#1415)
Browse files Browse the repository at this point in the history
* added new compile-sync pass without sync reg

* fixed format

* made compile-sync-without-sync-reg work

* formatting

* merge conflicts

* fmt

* try to fix clippy

* fixed test cases

* comments

* addressed comments and added new test cases for 3 threads

* fmt

* fixed test cases

* commit

* merge conflict

* fmt

* fixed test cases
paili0628 authored Apr 29, 2023
1 parent 71498ec commit e074e83
Showing 13 changed files with 491 additions and 13 deletions.
5 changes: 4 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -18,5 +18,8 @@
"python.analysis.typeCheckingMode": "basic",
"python.analysis.inlayHints.variableTypes": true,
"python.linting.flake8Enabled": true,
"jupyter.interactiveWindow.codeLens.enable": true
"jupyter.interactiveWindow.codeLens.enable": true,
"rust-analyzer.linkedProjects": [
"./calyx-opt/Cargo.toml"
]
}
17 changes: 9 additions & 8 deletions calyx-opt/src/default_passes.rs
Original file line number Diff line number Diff line change
@@ -2,13 +2,13 @@
use crate::passes::{
Canonicalize, CellShare, ClkInsertion, CollapseControl, CombProp,
CompileEmpty, CompileInvoke, CompileRef, CompileStatic, CompileSync,
ComponentInliner, DeadAssignmentRemoval, DeadCellRemoval, DeadGroupRemoval,
Externalize, GoInsertion, GroupToInvoke, GroupToSeq, HoleInliner,
InferShare, InferStaticTiming, LowerGuards, MergeAssign, MergeStaticPar,
Papercut, ParToSeq, RegisterUnsharing, RemoveIds, ResetInsertion,
SimplifyWithControl, StaticInliner, StaticParConv, SynthesisPapercut,
TopDownCompileControl, TopDownStaticTiming, UnrollBounded, WellFormed,
WireInliner,
CompileSyncWithoutSyncReg, ComponentInliner, DeadAssignmentRemoval,
DeadCellRemoval, DeadGroupRemoval, Externalize, GoInsertion, GroupToInvoke,
GroupToSeq, HoleInliner, InferShare, InferStaticTiming, LowerGuards,
MergeAssign, MergeStaticPar, Papercut, ParToSeq, RegisterUnsharing,
RemoveIds, ResetInsertion, SimplifyWithControl, StaticInliner,
StaticParConv, SynthesisPapercut, TopDownCompileControl,
TopDownStaticTiming, UnrollBounded, WellFormed, WireInliner,
};
use crate::traversal::Named;
use crate::{pass_manager::PassManager, register_alias};
@@ -48,6 +48,7 @@ impl PassManager {
pm.register_pass::<TopDownCompileControl>()?;
pm.register_pass::<CompileRef>()?;
pm.register_pass::<CompileSync>()?;
pm.register_pass::<CompileSyncWithoutSyncReg>()?;

// Lowering passes
pm.register_pass::<GoInsertion>()?;
@@ -75,7 +76,7 @@ impl PassManager {
pm,
"pre-opt",
[
CompileSync,
CompileSyncWithoutSyncReg,
GroupToSeq,
DeadAssignmentRemoval,
GroupToInvoke, // Creates Dead Groups potentially
5 changes: 3 additions & 2 deletions calyx-opt/src/passes/mod.rs
Original file line number Diff line number Diff line change
@@ -8,7 +8,6 @@ mod compile_empty;
mod compile_invoke;
mod compile_ref;
mod compile_static;
mod compile_sync;
mod component_iniliner;
mod dead_assignment_removal;
mod dead_cell_removal;
@@ -31,6 +30,7 @@ mod register_unsharing;
mod remove_ids;
mod reset_insertion;
mod static_inliner;
mod sync;
// mod simplify_guards;
mod simplify_with_control;
mod static_par_conv;
@@ -50,7 +50,6 @@ pub use compile_empty::CompileEmpty;
pub use compile_invoke::CompileInvoke;
pub use compile_ref::CompileRef;
pub use compile_static::CompileStatic;
pub use compile_sync::CompileSync;
pub use component_iniliner::ComponentInliner;
pub use dead_assignment_removal::DeadAssignmentRemoval;
pub use dead_cell_removal::DeadCellRemoval;
@@ -72,6 +71,8 @@ pub use remove_ids::RemoveIds;
pub use reset_insertion::ResetInsertion;
pub use simplify_with_control::SimplifyWithControl;
pub use static_inliner::StaticInliner;
pub use sync::CompileSync;
pub use sync::CompileSyncWithoutSyncReg;
// pub use simplify_guards::SimplifyGuards;
pub use static_par_conv::StaticParConv;
pub use synthesis_papercut::SynthesisPapercut;
Original file line number Diff line number Diff line change
@@ -46,6 +46,7 @@ impl Named for CompileSync {
}
}

/// put into the count set the barrier indices appearing in the thread
fn count_barriers(
s: &ir::Control,
count: &mut HashSet<u64>,
221 changes: 221 additions & 0 deletions calyx-opt/src/passes/sync/compile_sync_without_sync_reg.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
use crate::traversal::{Action, Named, VisResult, Visitor};
use calyx_ir::Guard;
use calyx_ir::Nothing;
use calyx_ir::{self as ir, GetAttributes, RRC};
use calyx_ir::{build_assignments, guard, structure};
use calyx_utils::{CalyxResult, Error};
use std::collections::HashMap;

#[derive(Default)]
/// Compiles @sync without use of std_sync_reg
/// Upon encountering @sync, it first instantiates a std_reg(1) for each thread(`bar`)
/// and a std_wire(1) for each barrier (`s`)
/// It then continuously assigns the value of (`s.in`) to 1'd1 guarded by the
/// expression that all values of `bar` for threads under the barrier are
/// set to 1'd1
/// Then it replaces the @sync control operator with
/// seq {
/// barrier;
/// clear;
/// }
/// `barrier` simply sets the value of `bar` to 1'd1 and then waits
/// for `s.out` to be up
/// `clear` resets the value of `bar` to 1'd0 for reuse of barrier
/// Using this method, each thread only incurs 3 cycles of latency overhead for
/// the barrier, and we theoretically won't have a limit for number of threads
/// under one barrier
pub struct CompileSyncWithoutSyncReg;

impl Named for CompileSyncWithoutSyncReg {
fn name() -> &'static str {
"compile-sync-without-sync-reg"
}

fn description() -> &'static str {
"Implement barriers for statements marked with @sync attribute without std_sync_reg"
}
}

// Data structure storing the shared `s` register and the guard accumulator
// for guarding `s.in`
#[derive(Default)]
struct BarrierMap(HashMap<u64, (RRC<ir::Cell>, Box<ir::Guard<ir::Nothing>>)>);

impl BarrierMap {
fn get_mut(
&mut self,
idx: &u64,
) -> Option<&mut (RRC<calyx_ir::Cell>, Box<Guard<Nothing>>)> {
self.0.get_mut(idx)
}

fn new() -> Self {
BarrierMap(HashMap::new())
}

fn get_reg(&mut self, idx: &u64) -> &mut RRC<ir::Cell> {
let (cell, _) = self.get_mut(idx).unwrap();
cell
}

fn get_guard(&mut self, idx: &u64) -> &mut Box<ir::Guard<ir::Nothing>> {
let (_, gd) = self.get_mut(idx).unwrap();
gd
}

fn insert_shared_wire(&mut self, builder: &mut ir::Builder, idx: &u64) {
if self.0.get(idx).is_none() {
structure!(builder;
let s = prim std_wire(1);
);
let gd = ir::Guard::True;
self.0.insert(*idx, (s, Box::new(gd)));
}
}
}

// instantiates the hardware and the two groups: `bar` and `clear` for each
// barrier
fn build_barrier_group(
builder: &mut ir::Builder,
barrier_idx: &u64,
barrier_reg: &mut BarrierMap,
) -> ir::Control {
let group = builder.add_group("barrier");
structure!(
builder;
let bar = prim std_reg(1);
let z = constant(0, 1);
let constant = constant(1, 1);
);

barrier_reg
.get_guard(barrier_idx)
.update(|g| g.and(guard!(bar["out"])));

let s = barrier_reg.get_reg(barrier_idx);

let assigns = build_assignments!(builder;
bar["in"] = ? constant["out"];
bar["write_en"] = ? constant["out"];
group["done"] = ? s["out"];
);
group.borrow_mut().assignments.extend(assigns);

let clear = builder.add_group("clear");
let clear_assigns = build_assignments!(builder;
bar["in"] = ? z["out"];
bar["write_en"] = ? constant["out"];
clear["done"] = ? bar["done"];);
clear.borrow_mut().assignments.extend(clear_assigns);

let stmts = vec![ir::Control::enable(group), ir::Control::enable(clear)];

ir::Control::seq(stmts)
}

// produces error if `invoke` or `enable` are marked with @sync
fn produce_err(con: &ir::Control) -> CalyxResult<()> {
match con {
ir::Control::Enable(e) => {
if con.get_attributes().get("sync").is_some() {
return Err(Error::malformed_control(
"Enable or Invoke controls cannot be marked with @sync"
.to_string(),
)
.with_pos(e.get_attributes()));
}
Ok(())
}
ir::Control::Invoke(i) => {
if con.get_attributes().get("sync").is_some() {
return Err(Error::malformed_control(
"Enable or Invoke controls cannot be marked with @sync"
.to_string(),
)
.with_pos(&i.attributes));
}
Ok(())
}
_ => Ok(()),
}
}

// recursively looks for the `@sync` control operator and then replaces them with
// the corresponding `seq` block
fn insert_barrier(
builder: &mut ir::Builder,
con: &mut ir::Control,
barrier_reg: &mut BarrierMap,
barrier_con: &mut HashMap<u64, ir::Control>,
) -> CalyxResult<()> {
match con {
ir::Control::Empty(_) => {
if let Some(n) = con.get_attributes().get("sync") {
barrier_reg.insert_shared_wire(builder, n);
let con_ref = barrier_con.entry(*n).or_insert_with(|| {
build_barrier_group(builder, n, barrier_reg)
});
std::mem::swap(con, &mut ir::Cloner::control(con_ref));
}
Ok(())
}
ir::Control::Seq(seq) => {
for s in seq.stmts.iter_mut() {
insert_barrier(builder, s, barrier_reg, barrier_con)?;
}
Ok(())
}
ir::Control::If(i) => {
insert_barrier(builder, &mut i.tbranch, barrier_reg, barrier_con)?;
insert_barrier(builder, &mut i.fbranch, barrier_reg, barrier_con)?;
Ok(())
}
ir::Control::While(w) => {
insert_barrier(builder, &mut w.body, barrier_reg, barrier_con)?;
Ok(())
}
ir::Control::Enable(_) | ir::Control::Invoke(_) => {
produce_err(con)?;
Ok(())
}
_ => Ok(()),
}
}
impl Visitor for CompileSyncWithoutSyncReg {
fn finish_par(
&mut self,
s: &mut ir::Par,
comp: &mut ir::Component,
sigs: &ir::LibrarySignatures,
_comps: &[ir::Component],
) -> VisResult {
let mut builder = ir::Builder::new(comp, sigs);
let mut barrier_reg: BarrierMap = BarrierMap::new();
for stmt in s.stmts.iter_mut() {
let mut barrier_con: HashMap<u64, ir::Control> = HashMap::new();
insert_barrier(
&mut builder,
stmt,
&mut barrier_reg,
&mut barrier_con,
)?;
}

// add continuous assignments for value of `s`
for (_, (wire, g_box)) in barrier_reg.0 {
structure!( builder;
let constant = constant(1,1);
);
let g = *g_box;
let cont_assigns = build_assignments!(builder;
wire["in"] = g ? constant["out"];
);
builder
.component
.continuous_assignments
.extend(cont_assigns);
}
Ok(Action::Continue)
}
}
5 changes: 5 additions & 0 deletions calyx-opt/src/passes/sync/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
mod compile_sync;
mod compile_sync_without_sync_reg;

pub use compile_sync::CompileSync;
pub use compile_sync_without_sync_reg::CompileSyncWithoutSyncReg;
2 changes: 1 addition & 1 deletion examples/sync/sync-doc-example.expect
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"cycles": 179,
"cycles": 74,
"memories": {
"accm": [
15
2 changes: 1 addition & 1 deletion examples/sync/sync-if.expect
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"cycles": 205,
"cycles": 112,
"memories": {
"in_0": [
1,
5 changes: 5 additions & 0 deletions tests/correctness/sync/sync-three-threads.expect
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"out": [
36
]
}
107 changes: 107 additions & 0 deletions tests/correctness/sync/sync-three-threads.futil
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import "primitives/core.futil";

component main() -> () {
cells {
@external out = std_mem_d1(32, 1, 3);
val_1 = std_reg(32);
incr_1 = std_add(32);
val_2 = std_reg(32);
incr_2 = std_add(32);
val_3 = std_reg(32);
incr_3 = std_add(32);
loop = std_reg(3);
a = std_add(3);
ls = std_lt(3);
val = std_reg(32);
ac = std_add(32);
add_0 = std_add(32);
add_1 = std_add(32);
}

wires {

group calc_val_1 {
incr_1.left = val_1.out;
incr_1.right = 32'd1;
val_1.in = incr_1.out;
val_1.write_en = 1'd1;
calc_val_1[done] = val_1.done;
}

group calc_val_2 {
incr_2.left = val_2.out;
incr_2.right = 32'd2;
val_2.in = incr_2.out;
val_2.write_en = 1'd1;
calc_val_2[done] = val_2.done;
}

group calc_val_3 {
incr_3.left = val_3.out;
incr_3.right = 32'd3;
val_3.in = incr_3.out;
val_3.write_en = 1'd1;
calc_val_3[done] = val_3.done;
}

group accm {
add_0.left = val_1.out;
add_0.right = val_2.out;
add_1.left = add_0.out;
add_1.right = val_3.out;
ac.left = val.out;
ac.right = add_1.out;
val.in = ac.out;
val.write_en = 1'd1;
accm[done] = val.done;
}

group incr_loop {
a.left = loop.out;
a.right = 3'd1;
loop.in = a.out;
loop.write_en = 1'd1;
incr_loop[done] = loop.done;
}

group reg_to_mem {
out.write_en = 1'd1;
out.write_data = val.out;
out.addr0 = 3'd0;
reg_to_mem[done] = out.done;
}

comb group cond {
ls.left = loop.out;
ls.right = 3'd3;
}
}

control {
seq {
par {
// thread A
while ls.out with cond {
calc_val_1;
@sync(1);
@sync(2);
}
// thread B
while ls.out with cond {
calc_val_2;
@sync(1);
incr_loop;
@sync(2);
}
// thread C
while ls.out with cond {
calc_val_3;
@sync(1);
accm;
@sync(2);
}
}
reg_to_mem;
}
}
}
10 changes: 10 additions & 0 deletions tests/correctness/sync/sync-three-threads.futil.data
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"out": {
"data": [0],
"format": {
"numeric_type": "bitnum",
"is_signed": false,
"width": 32
}
}
}
74 changes: 74 additions & 0 deletions tests/passes/compile-sync-without-sync-reg/sync-simple.expect
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import "primitives/core.futil";
import "primitives/sync.futil";
component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
cells {
out = std_mem_d1(32, 1, 3);
val = std_reg(32);
add_0 = std_add(32);
no_use = std_reg(32);
@generated s = std_wire(1);
@generated bar = std_reg(1);
@generated bar0 = std_reg(1);
}
wires {
group no_op {
no_use.write_en = 1'd1;
no_use.in = 32'd0;
no_op[done] = no_use.done;
}
group calc_val {
val.write_en = 1'd1;
add_0.right = 32'd1;
add_0.left = 32'd2;
val.in = add_0.out;
calc_val[done] = val.done;
}
group reg_to_mem {
out.addr0 = 3'd0;
out.write_data = val.out;
out.write_en = 1'd1;
reg_to_mem[done] = out.done;
}
group barrier {
bar.in = 1'd1;
bar.write_en = 1'd1;
barrier[done] = s.out;
}
group clear {
bar.in = 1'd0;
bar.write_en = 1'd1;
clear[done] = bar.done;
}
group barrier0 {
bar0.in = 1'd1;
bar0.write_en = 1'd1;
barrier0[done] = s.out;
}
group clear0 {
bar0.in = 1'd0;
bar0.write_en = 1'd1;
clear0[done] = bar0.done;
}
s.in = bar.out & bar0.out ? 1'd1;
}

control {
par {
seq {
no_op;
seq {
barrier;
clear;
}
reg_to_mem;
}
seq {
calc_val;
seq {
barrier0;
clear0;
}
}
}
}
}
50 changes: 50 additions & 0 deletions tests/passes/compile-sync-without-sync-reg/sync-simple.futil
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// -p validate -p compile-sync-without-sync-reg

import "primitives/core.futil";
import "primitives/sync.futil";

component main() -> () {
cells {
out = std_mem_d1(32, 1, 3);
val = std_reg(32);
add_0 = std_add(32);
no_use = std_reg(32);
}

wires {
group no_op {
no_use.in = 32'd0;
no_use.write_en = 1'd1;
no_op[done] = no_use.done;
}

group calc_val {
add_0.left = 32'd2;
add_0.right = 32'd1;
val.in = add_0.out;
val.write_en = 1'd1;
calc_val[done] = val.done;
}

group reg_to_mem {
out.write_en = 1'd1;
out.write_data = val.out;
out.addr0 = 3'd0;
reg_to_mem[done] = out.done;
}
}

control {
par {
seq {
no_op;
@sync(1);
reg_to_mem;
}
seq {
calc_val;
@sync(1);
}
}
}
}

0 comments on commit e074e83

Please sign in to comment.