From b624a6dc709590d7c407a41210cc925808a181e0 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Thu, 5 Nov 2020 10:01:30 -0500 Subject: [PATCH 01/75] Use dahlia. --- frontends/relay-futil/README.md | 8 +- frontends/relay-futil/compiler.py | 144 +++++--- frontends/relay-futil/dahlia_functions.py | 125 +++++++ frontends/relay-futil/example.py | 40 +-- frontends/relay-futil/futil_ast.py | 54 ++- frontends/relay-futil/pretty_print.py | 3 + frontends/relay-futil/tests/add.expect | 126 ++++--- frontends/relay-futil/tests/data/add.expect | 3 - .../relay-futil/tests/data/add.relay.data | 6 +- .../tests/data/{let.expect => let1.expect} | 3 - .../tests/data/{let.relay => let1.relay} | 0 .../data/{let.relay.data => let1.relay.data} | 4 - frontends/relay-futil/tests/data/let2.expect | 5 +- frontends/relay-futil/tests/data/let2.relay | 2 +- .../relay-futil/tests/data/let2.relay.data | 6 +- frontends/relay-futil/tests/data/sub.expect | 3 - .../relay-futil/tests/data/sub.relay.data | 4 - .../tests/data/tensor2d_add.expect | 44 +++ .../relay-futil/tests/data/tensor2d_add.relay | 5 + .../tests/data/tensor2d_add.relay.data | 14 + .../tests/data/tensor3d_batch_flatten.expect | 18 + .../tests/data/tensor3d_batch_flatten.relay | 6 + .../data/tensor3d_batch_flatten.relay.data | 10 + frontends/relay-futil/tests/let1.expect | 134 ++++--- frontends/relay-futil/tests/let2.expect | 233 ++++++++----- frontends/relay-futil/tests/let3.expect | 330 ++++++++++++------ frontends/relay-futil/tests/sub.expect | 126 ++++--- .../relay-futil/tests/tensor2d_add.expect | 135 +++++++ .../relay-futil/tests/tensor2d_add.relay | 6 + .../tests/tensor3d_batch_flatten.expect | 166 +++++++++ .../tests/tensor3d_batch_flatten.relay | 6 + frontends/relay-futil/utilities.py | 251 +++---------- 32 files changed, 1349 insertions(+), 671 deletions(-) create mode 100644 frontends/relay-futil/dahlia_functions.py rename frontends/relay-futil/tests/data/{let.expect => let1.expect} (78%) rename frontends/relay-futil/tests/data/{let.relay => let1.relay} (100%) rename frontends/relay-futil/tests/data/{let.relay.data => let1.relay.data} (81%) create mode 100644 frontends/relay-futil/tests/data/tensor2d_add.expect create mode 100644 frontends/relay-futil/tests/data/tensor2d_add.relay create mode 100644 frontends/relay-futil/tests/data/tensor2d_add.relay.data create mode 100644 frontends/relay-futil/tests/data/tensor3d_batch_flatten.expect create mode 100644 frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay create mode 100644 frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay.data create mode 100644 frontends/relay-futil/tests/tensor2d_add.expect create mode 100644 frontends/relay-futil/tests/tensor2d_add.relay create mode 100644 frontends/relay-futil/tests/tensor3d_batch_flatten.expect create mode 100644 frontends/relay-futil/tests/tensor3d_batch_flatten.relay diff --git a/frontends/relay-futil/README.md b/frontends/relay-futil/README.md index 427909c036..0136ec5436 100644 --- a/frontends/relay-futil/README.md +++ b/frontends/relay-futil/README.md @@ -42,12 +42,10 @@ Run an Example Try this to run a simple example: ```bash cd futil/frontends/relay-futil -python3 example.py +python3 example.py add ``` - -Pass the `-r` flag to this script to see the Relay code. Otherwise, we just print the FuTIL code. There is also an `-o` flag to try optimizing the Relay code a little bit. - -You can specify the name of an example as a command-line argument. Currently, the only option is `identity`. +Pass the `-h` flag to this script for help. +Pass the `-r` flag to this script to see the Relay IR. Otherwise, we just print the FuTIL output. Run the Tests diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 89b8991798..b10932c239 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -1,90 +1,139 @@ from tvm import relay, ir from tvm.relay.expr_functor import ExprFunctor from tvm.relay.function import Function -import textwrap -from collections import namedtuple, defaultdict -import math +from collections import defaultdict from pretty_print import * from utilities import * from futil_ast import * +from dahlia_functions import * -# Map standard Relay call to respective hardware name in FuTIL. -BuiltInBinaryCalls = {'add': 'add', 'equal': 'eq', 'multiply': 'mult', 'subtract': 'sub'} - -EmitResult = namedtuple('EmitResult', ['cells', 'groups']) +# Mapping from Relay binary calls to the respective Dahlia operator. +BuiltInBinaryCalls = {'add': '+', 'multiply': '*', 'subtract': '-'} class Relay2Futil(ExprFunctor): """The main compilation visitor.""" + def __init__(self): + super(Relay2Futil, self).__init__() + self.id_dictionary = defaultdict(int) + self.relay_id_dictionary = defaultdict(int) + self.dahlia_components = [] + self.main = FComponent(name="main", cells=[], wires=[]) + def id(self, name): """ - Provides unique identification for a given name. + Provides a unique identification for a given name. """ id_number = self.id_dictionary[name] self.id_dictionary[name] += 1 return name + str(id_number) - def __init__(self): - super(Relay2Futil, self).__init__() - self.id_dictionary = defaultdict(int) - self.main = FComponent(name="main", cells=[], wires=[]) + def relay_id(self, name): + """ + Relay does not explicitly differentiate a variable name if it is used twice. For example, + %x = foo(%y); + %x1 = bar(%x); // Here, at this level, the name_hint associated with `x1` is still 'x'. + To avoid this, we provide Relay with its own identification dictionary. If 'x' is seen + three times, it will produce: 'x', 'x1', x2'. + """ + id_number = self.relay_id_dictionary[name] + self.relay_id_dictionary[name] += 1 + if id_number == 0: return name + return name + str(id_number) + + def produce_dahlia_name(self, name, type): + """ + Dahlia uses the following naming scheme for an arbitrary variable 'X': + Memory1D: 'X0', 'X1', 'X2', ... + Memory2D: 'X0_0', 'X1_0', 'X2_0', ... + Memory3D: 'X0_0_0', 'X1_0_0', 'X2_0_0', ... + """ + dahlia_name = self.id(name) + if type == PrimitiveType.Memory1D: return dahlia_name + if type == PrimitiveType.Memory2D: return dahlia_name + "_0" + if type == PrimitiveType.Memory3D: return dahlia_name + "_0_0" + assert False, f'{name} with {type} is not supported yet.' + + def get_dahlia_function_type(self, function_name, input_type): + """ + Returns the corresponding name, Dahlia function type, and op (if it is a binary op, otherwise None). + If the function type isn't supported, fails with an assertion. + """ + op = None + if function_name in BuiltInBinaryCalls: + op = BuiltInBinaryCalls[function_name] + if input_type == PrimitiveType.Memory1D: + return self.relay_id(f'tensor1d_{function_name}'), DahliaFunctionType.Tensor1DBinaryOp, op + if input_type == PrimitiveType.Memory2D: + return self.relay_id(f'tensor2d_{function_name}'), DahliaFunctionType.Tensor2DBinaryOp, op + + if function_name == "nn.batch_flatten": + assert input_type == PrimitiveType.Memory3D, f'{input_type} not supported for batch flattening.' + return self.relay_id(f'tensor3d_batch_flatten'), DahliaFunctionType.Tensor3DBatchFlatten, op + + assert False, f'{function_name} with {input_type} is not supported.' def visit_var(self, var): - name = var.name_hint - type = str(var.type_annotation) - data = [get_bitwidth(type), 1, 1] # [width, size, index_size] - return [FCell(primitive=FPrimitive(name=name, data=data, type=PrimitiveType.Memory1D))] + name = self.relay_id(var.name_hint) + if self.main.contains_primitive(name): return [cell] + + data, type = get_memory_parameters(var.type_annotation) + dahlia_name = self.produce_dahlia_name(name, type) + return [FCell(dahlia_name=dahlia_name, primitive=FPrimitive(name=name, data=data, type=type))] def visit_let(self, let): - variable = self.visit(let.var)[0] + variable = self.visit(let.var) body = self.visit(let.body) values = self.visit(let.value) - for value in values: - if not value.is_declaration(): continue - value.declaration.intermediary_output = FCell( - primitive=FPrimitive(name=variable.primitive.name, data=variable.primitive.data, - type=PrimitiveType.Memory1D)) + output = variable[0] + for value in flatten(values): + if not value.is_dahlia_declaration(): continue + decl = value.dahlia_declaration + decl.output = output + # TODO(cgyurgyik): This shouldn't be necessary. To simplify, produce mapping + # between enum and corresponding function. + if decl.type == DahliaFunctionType.Tensor1DBinaryOp: + decl.program = tensor1d_op(decl) + elif decl.type == DahliaFunctionType.Tensor2DBinaryOp: + decl.program = tensor2d_op(decl) + elif decl.type == DahliaFunctionType.Tensor3DBatchFlatten: + decl.program = tensor3d_batch_flatten(decl) return [body, values] def visit_constant(self, const): type = const.data.dtype shape = const.data.shape - data = [get_bitwidth(type), int(const.data.asnumpy())] # [width, value] + data = [get_bitwidth(type), int(const.data.asnumpy())] name = self.id("const") return [FCell(primitive=FPrimitive(name=name, data=data, type=PrimitiveType.Constant))] def visit_call(self, call): - assert call.op.name in BuiltInBinaryCalls, f'{call.op.name} not supported.' - op = BuiltInBinaryCalls[call.op.name] - + cells = [] args = [] - for arg in call.args: args.append(self.visit(arg)) - return [build_tensor_0D_binary_op(call, args, op)] + for arg in call.args: + result = self.visit(arg) + cells.append(result) + args.append(result) + cells = flatten(cells) + name, type, op = self.get_dahlia_function_type(call.op.name, cells[0].primitive.type) + dahlia_declaration = DahliaDeclaration(component_name=name, decl_name=self.id(name), op=op, + inputs=flatten(args), type=type) + cells.append(FCell(dahlia_declaration=dahlia_declaration)) + return cells def visit_function(self, function): - fn: FComponent = FComponent(name=self.id("function"), cells=[], wires=[], - signature=FSignature(inputs=[], outputs=[])) - fn.signature.inputs, fn.signature.outputs = extract_function_arguments(function.params) body = self.visit(function.body) - components = [fn] for cell in flatten(body): - if cell.is_declaration(): - fn.add_cell(cell) - components.append(cell.declaration.component) - elif cell.primitive.type == PrimitiveType.Constant: - # Include constants, but not function arguments. - fn.add_cell(cell) - - build_function_body(fn) # Groups, wires, connections. - - # Add declaration to main. - self.main.add_cell(FCell(declaration=FDeclaration(name=self.id("fn"), component=fn))) + self.main.add_cell(cell) + if not cell.is_dahlia_declaration(): continue + self.dahlia_components.append(cell.dahlia_declaration.program) - return '\n'.join(pp_component(c) for c in reversed(components)) + build_main(self.main) # Groups, wires, connections. + return pp_component(self.main) def infer_type(expr: Function) -> Function: @@ -93,7 +142,6 @@ def infer_type(expr: Function) -> Function: to_normal_pass = relay.transform.ToANormalForm() mod = ir.IRModule() mod['main'] = expr - # mod = fuse_op__pass(mod) mod = infer_types_pass(mod) ret = mod['main'] return ret @@ -103,12 +151,12 @@ def compile(program) -> str: """Translate a Relay function to a FuTIL program (as a string).""" program = infer_type(program) visitor = Relay2Futil() - src = visitor.visit(program) - build_main_body(visitor.main) PREAMBLE = """import "primitives/std.lib";""" + MAIN = visitor.visit(program) + DAHLIA_COMPONENTS = '\n'.join(visitor.dahlia_components) NEWL = "\n\n" - return f'{PREAMBLE}{NEWL}{src}{NEWL}{pp_component(visitor.main)}' + return f'{PREAMBLE}{NEWL}{DAHLIA_COMPONENTS}{NEWL}{MAIN}{NEWL}' if __name__ == '__main__': diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py new file mode 100644 index 0000000000..36873c3106 --- /dev/null +++ b/frontends/relay-futil/dahlia_functions.py @@ -0,0 +1,125 @@ +import subprocess + +from futil_ast import * + + +def lower_dahlia_program(prog, component_name): + ''' + Takes in a string that represents a Dahlia program, lowers it to FuTIL, and applies the `externalize` pass. + This is just for experimental purposes, and needs to be replaced. + More bluntly, this does the following: + 1. Copies dahlia program `prog` to a temporary file `temp.fuse`. + $ echo `program_string` > temp.fuse + + 2. Lowers `temp.fuse` to FuTIL with the name changed to `component_name`, and saves it in `lowered.futil`. + $ ./fuse temp.fuse --lower -b=futil -n=component_name > lowered.futil + + 3. Runs the 'externalize' pass on the `lowered.futil` file. + $ cargo run -- lowered.futil -p externalize > temp.futil + + 4. Copies the output from `lowered.futil`, except for the first line (we don't want another copy of the import). + + TODO(cgyurgyik): As you'll see below, this only works on my local machine. + I've explicitly removed errors with `2>/dev/null` so they aren't inserted + to the file as well. However, this makes debugging difficult as well. + ''' + program_string = "\"" + for line in prog.splitlines(): + program_string += f'{line}\n' + program_string += "\"" + no_err = "2>/dev/null" + command = \ + f""" + echo {program_string} > temp.fuse && + /Users/cgyurgyik/Projects/dahlia/fuse temp.fuse --lower -b=futil -n={component_name} > lowered.futil {no_err} -l error && + cd ../../ && + cargo run -- frontends/relay-futil/lowered.futil -p externalize > frontends/relay-futil/temp.futil {no_err} && + cd frontends/relay-futil/ + """ + subprocess.Popen(command, stdout=subprocess.PIPE, shell=True).communicate() + dahlia_component = open('temp.futil', 'r').read()[29:] # Skip over importing the primitives library. + subprocess.Popen("rm temp.fuse ; rm lowered.futil ; rm temp.futil", stdout=subprocess.PIPE, + shell=True).communicate() + return dahlia_component + + +def tensor1d_op(declaration): + op1 = declaration.inputs[0].primitive + op2 = declaration.inputs[1].primitive + res = declaration.output.primitive + + assert op1.type == PrimitiveType.Memory1D and op1.type == op2.type and op2.type == res.type + assert op1.data[0] == op2.data[0] and op1.data[0] == res.data[0] + assert op1.data[1] == op2.data[1] and op2.data[1] == res.data[1] + assert op1.data[2] == op2.data[2] and op2.data[2] == res.data[2] + bitwidth = op1.data[0] + size = op1.data[1] + index_size = op1.data[2] + return lower_dahlia_program(f""" + decl {op1.name}: ubit<{bitwidth}>[{size}]; + decl {op2.name}: ubit<{bitwidth}>[{size}]; + decl {res.name}: ubit<{bitwidth}>[{size}]; + for (let i: ubit<{index_size}> = 0..{size}) {{ + {res.name}[i] := {op1.name}[i] {declaration.op} {op2.name}[i]; + }}""", declaration.component_name) + + +def tensor2d_op(declaration): + op1 = declaration.inputs[0].primitive + op2 = declaration.inputs[1].primitive + res = declaration.output.primitive + + assert op1.type == PrimitiveType.Memory2D and op1.type == op2.type and op2.type == res.type + assert op1.data[0] == op2.data[0] and op1.data[0] == res.data[0] + assert op1.data[1] == op2.data[1] and op2.data[1] == res.data[1] + assert op1.data[2] == op2.data[2] and op2.data[2] == res.data[2] + assert op1.data[3] == op2.data[3] and op2.data[3] == res.data[3] + assert op1.data[4] == op2.data[4] and op2.data[4] == res.data[4] + + bitwidth = op1.data[0] + size0 = op1.data[1] + size1 = op1.data[2] + index_size0 = op1.data[3] + index_size1 = op1.data[4] + return lower_dahlia_program(f""" + decl {op1.name}: ubit<{bitwidth}>[{size0}][{size1}]; + decl {op2.name}: ubit<{bitwidth}>[{size0}][{size1}]; + decl {res.name}: ubit<{bitwidth}>[{size0}][{size1}]; + for (let i: ubit<{index_size0}> = 0..{size0}) {{ + for (let j: ubit<{index_size1}> = 0..{size1}) {{ + {res.name}[i][j] := {op1.name}[i][j] {declaration.op} {op2.name}[i][j]; + }} + }}""", declaration.component_name) + + +def tensor3d_batch_flatten(declaration): + op1 = declaration.inputs[0].primitive + res = declaration.output.primitive + + bitwidth = op1.data[0] + op1_size0 = op1.data[1] + op1_size1 = op1.data[2] + op1_size2 = op1.data[3] + op1_index_size0 = op1.data[4] + op1_index_size1 = op1.data[5] + op1_index_size2 = op1.data[6] + res_bitwidth = res.data[0] + res_size0 = res.data[1] + res_size1 = res.data[2] + res_index_size0 = res.data[3] + res_index_size1 = res.data[4] + + assert op1.type == PrimitiveType.Memory3D and res_size1 == op1_size1 * op1_size2 and res_size0 == op1_size0 + assert res.type == PrimitiveType.Memory2D and res_bitwidth == bitwidth + return lower_dahlia_program(f""" + decl {op1.name}: ubit<{bitwidth}>[{op1_size0}][{op1_size1}][{op1_size2}]; + decl {res.name}: ubit<{bitwidth}>[{res_size0}][{res_size1}]; + let l: ubit<{res_index_size1}> = 0; + for (let i: ubit<{op1_index_size0}> = 0..{op1_size0}) {{ + for (let j: ubit<{op1_index_size1}> = 0..{op1_size1}) {{ + for (let k: ubit<{op1_index_size2}> = 0..{op1_size2}) {{ + {res.name}[i][l] := {op1.name}[i][j][k]; + l := l + 1; + }} + }} + }}""", declaration.component_name) diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index 6d51b5dba2..9d0c06d8f8 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -1,11 +1,10 @@ import tvm from tvm import relay -from tvm.relay import parser from compiler import * import sys -def tensor_0d_add(): +def add(): """Add together two variables in Relay. """ x = relay.var('x', shape=(), dtype="int32") @@ -13,15 +12,7 @@ def tensor_0d_add(): return relay.Function([x, y], relay.add(x, y)) -def tensor_1d_add(): - """Add together two 1-dimensional tensors in Relay. - """ - x = relay.var("x", relay.TensorType((1, 4), "int32")) - y = relay.var("y", relay.TensorType((1, 4), "int32")) - return relay.Function([x, y], relay.add(x, y)) - - -def tensor_2d_add(): +def tensor_add(): """Add together two 2-dimensional tensors in Relay. """ x = relay.var("x", relay.TensorType((2, 4), "int32")) @@ -29,13 +20,12 @@ def tensor_2d_add(): return relay.Function([x, y], relay.add(x, y)) -def assign(): - """Assign a const to a varible +def batch_flatten(): + """Flattens all dimensions except for the batch dimension. """ - x = relay.var('x', shape=()) - v1 = relay.log(x) - v2 = relay.add(v1, x) - return relay.Function([x], v2) + x = relay.var("x", relay.TensorType((2, 5, 5), "int32")) + return relay.Function([x], relay.nn.batch_flatten(x)) + def mlp_net(): """The MLP test from Relay. @@ -44,16 +34,27 @@ def mlp_net(): return mlp.get_net(1) -ALL_FUNCS = [tensor_0d_add, tensor_1d_add, tensor_2d_add, mlp_net] +ALL_FUNCS = [add, tensor_add, batch_flatten, mlp_net] +FUNC_NAMES = list(map(lambda x: x.__name__, ALL_FUNCS)) def simple_example(): - func = tensor_0d_add() # Default if none provided. + if '-h' in sys.argv[1:]: + supported_functions = [] + print("- To see FuTIL output:\n$ python3 example.py ") + print("- To see Relay IR:\n$ python3 example.py -r") + print("\n- Supported function names:") + for f in FUNC_NAMES: print(f' {f}') + return + func = None # See if the command line contains a function name. for option in ALL_FUNCS: if option.__name__ in sys.argv[1:]: func = option() break + if func == None: + print("For help:\n$ python3 example.py -h") + return # Try optimizing the Relay IR with a few built-in passes. seq = tvm.transform.Sequential([ @@ -65,7 +66,6 @@ def simple_example(): mod = tvm.IRModule.from_expr(func) mod_opt = seq(mod) func = mod_opt['main'] - if '-r' in sys.argv[1:]: # Dump the Relay representation (for educational purposes). print(func) diff --git a/frontends/relay-futil/futil_ast.py b/frontends/relay-futil/futil_ast.py index 9b45774c16..dbdd21e666 100644 --- a/frontends/relay-futil/futil_ast.py +++ b/frontends/relay-futil/futil_ast.py @@ -1,7 +1,12 @@ from dataclasses import dataclass from typing import List, Dict from enum import Enum -import textwrap + + +class DahliaFunctionType(Enum): + Tensor1DBinaryOp = 1 + Tensor2DBinaryOp = 2 + Tensor3DBatchFlatten = 3 class PrimitiveType(Enum): @@ -10,7 +15,6 @@ class PrimitiveType(Enum): Memory1D = 3 Memory2D = 4 Memory3D = 5 - BinOp = 6 class ControlType(Enum): @@ -48,21 +52,6 @@ class FSignature: outputs: List[FPortDef] -# @dataclass -# class Atom: -# ''' -# Atomic operations used in guard conditions and RHS of the guarded assignments. -# ''' -# port: FPort -# num: int # TODO(cgyurgyik): This uses a Bitnum structure. - - -# @dataclass -# class FGuard: -# guard_expression: str -# atom: Atom - - @dataclass class FWire: src: str # FGuard @@ -128,14 +117,38 @@ class FComponent: controls: FControl = None # Control statement for this component. signature: FSignature = None # Input and output ports. + def contains_primitive(self, name: str): + ''' + Determines whether this component contains a primitive with the given name. + ''' + # TODO(cgyurgyik): Rethink data structure here. + for cell in self.cells: + if not cell.is_primitive(): continue + if cell.primitive.name == name: return True + return False + def add_cell(self, subcomponent: Cell): ''' Appends a subcomponent to this component's list of FuTIL cells. ''' - # TODO(cgyurgyik): If its already contained here, don't re-add it. + if not subcomponent.is_primitive(): + self.cells.append(subcomponent) + return + if self.contains_primitive(subcomponent.primitive.name): return self.cells.append(subcomponent) +@dataclass +class DahliaDeclaration: + decl_name: str + component_name: str + type: DahliaFunctionType + op: str = None + program: str = None + inputs: List[Cell] = None + output: Cell = None + + @dataclass class FDeclaration: ''' @@ -149,11 +162,16 @@ class FDeclaration: @dataclass class FCell(Cell): + dahlia_name: str = None primitive: FPrimitive = None declaration: FDeclaration = None + dahlia_declaration: DahliaDeclaration = None def is_primitive(self): return self.primitive != None def is_declaration(self): return self.declaration != None + + def is_dahlia_declaration(self): + return self.dahlia_declaration != None diff --git a/frontends/relay-futil/pretty_print.py b/frontends/relay-futil/pretty_print.py index 3b4c5481bb..6b8cbe4632 100644 --- a/frontends/relay-futil/pretty_print.py +++ b/frontends/relay-futil/pretty_print.py @@ -1,4 +1,5 @@ from futil_ast import * +import textwrap def mk_block(decl, contents, indent=2): @@ -107,3 +108,5 @@ def pp_cell(cell: FCell): assert False, f'FCell pretty print unimplemented for {cell} with name {cell.primitive.name}' elif cell.is_declaration(): return f'{cell.declaration.name} = {cell.declaration.component.name};' + elif cell.is_dahlia_declaration(): + return f'{cell.dahlia_declaration.decl_name} = {cell.dahlia_declaration.component_name};' diff --git a/frontends/relay-futil/tests/add.expect b/frontends/relay-futil/tests/add.expect index c153759a33..c02365332b 100644 --- a/frontends/relay-futil/tests/add.expect +++ b/frontends/relay-futil/tests/add.expect @@ -1,84 +1,100 @@ import "primitives/std.lib"; -component add (x_out: 32, y_out: 32, in_done: 1) -> (in_write_data: 32, in_write_en: 1, in_addr0: 1) { +component tensor1d_add(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { - add = prim std_add(32); - c0 = prim std_const(1, 0); + add0 = prim std_add(32); + add1 = prim std_add(1); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(1, 1); + i0 = prim std_reg(1); + le0 = prim std_le(1); + x_read0_0 = prim std_reg(32); + y_read0_0 = prim std_reg(32); } wires { - group process_add { - in_addr0 = c0.out; - add.left = x_out; - add.right = y_out; - in_write_en = 1'd1; - in_write_data = add.out; - process_add[done] = in_done ? 1'd1; + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; } - } - control { - seq { - process_add; + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; } - } -} -component function0 (x_out: 32, y_out: 32, in_done: 1) -> (in_write_data: 32, in_write_en: 1, in_addr0: 1) { - cells { - add_fn = add; - z = prim std_mem_d1(32, 1, 1); - c0 = prim std_const(1, 0); - } - wires { - group run_add_fn { - add_fn.x_out = x_out; - add_fn.y_out = y_out; - z.write_data = add_fn.in_write_data; - z.write_en = add_fn.in_write_en; - z.addr0 = add_fn.in_addr0; - add_fn.in_done = z.done; - add_fn.go = 1'd1; - run_add_fn[done] = add_fn.done ? 1'd1; + group upd0<"static"=1> { + x_read0_0.write_en = 1'd1; + x0_addr0 = i0.out; + x_read0_0.in = 1'd1 ? x0_read_data; + upd0[done] = x_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + y_read0_0.write_en = 1'd1; + y0_addr0 = i0.out; + y_read0_0.in = 1'd1 ? y0_read_data; + upd1[done] = y_read0_0.done ? 1'd1; } - group save_return_value { - z.addr0 = c0.out; - in_addr0 = c0.out; - in_write_en = 1'd1; - in_write_data = z.read_data; - save_return_value[done] = in_done ? 1'd1; + group upd2<"static"=1> { + z0_addr0 = i0.out; + z0_write_en = 1'd1; + add0.left = x_read0_0.out; + add0.right = y_read0_0.out; + z0_write_data = 1'd1 ? add0.out; + upd2[done] = z0_done ? 1'd1; + } + group upd3<"static"=1> { + i0.write_en = 1'd1; + add1.left = i0.out; + add1.right = const2.out; + i0.in = 1'd1 ? add1.out; + upd3[done] = i0.done ? 1'd1; } } + control { seq { - run_add_fn; - save_return_value; + let0; + while le0.out with cond0 { + seq { + par { + upd0; + upd1; + } + upd2; + upd3; + } + } } } } component main () -> () { cells { - fn0 = function0; - c0 = prim std_const(1, 0); - main_ret = prim std_mem_d1(32, 1, 1); + z = prim std_mem_d1(32, 1, 1); x = prim std_mem_d1(32, 1, 1); y = prim std_mem_d1(32, 1, 1); + tensor1d_add0 = tensor1d_add; } wires { - group run_fn0 { - fn0.x_out = x.read_data; - x.addr0 = fn0.in_addr0; - fn0.y_out = y.read_data; - y.addr0 = fn0.in_addr0; - main_ret.addr0 = fn0.in_addr0; - main_ret.write_data = fn0.in_write_data; - main_ret.write_en = fn0.in_write_en; - fn0.in_done = main_ret.done; - fn0.go = 1'd1; - run_fn0[done] = fn0.done ? 1'd1; + group run_tensor1d_add { + x.addr0 = tensor1d_add0.x0_addr0; + tensor1d_add0.x0_read_data = x.read_data; + y.addr0 = tensor1d_add0.y0_addr0; + tensor1d_add0.y0_read_data = y.read_data; + z.addr0 = tensor1d_add0.z0_addr0; + z.write_data = tensor1d_add0.z0_write_data; + z.write_en = tensor1d_add0.z0_write_en; + tensor1d_add0.z0_done = z.done; + tensor1d_add0.go = 1'd1; + run_tensor1d_add[done] = tensor1d_add0.done ? 1'd1; } } control { seq { - run_fn0; + run_tensor1d_add; } } } + + diff --git a/frontends/relay-futil/tests/data/add.expect b/frontends/relay-futil/tests/data/add.expect index 64fea78c3e..8e6eaee89f 100644 --- a/frontends/relay-futil/tests/data/add.expect +++ b/frontends/relay-futil/tests/data/add.expect @@ -1,7 +1,4 @@ { - "main_ret": [ - 49 - ], "x": [ 42 ], diff --git a/frontends/relay-futil/tests/data/add.relay.data b/frontends/relay-futil/tests/data/add.relay.data index 2ad0db6bf5..2a8177b129 100644 --- a/frontends/relay-futil/tests/data/add.relay.data +++ b/frontends/relay-futil/tests/data/add.relay.data @@ -10,9 +10,5 @@ "z": { "data": [0], "bitwidth": 32 - }, - "main_ret": { - "data": [0], - "bitwidth": 32 - } + } } \ No newline at end of file diff --git a/frontends/relay-futil/tests/data/let.expect b/frontends/relay-futil/tests/data/let1.expect similarity index 78% rename from frontends/relay-futil/tests/data/let.expect rename to frontends/relay-futil/tests/data/let1.expect index 4a8f9a7636..91c8cc0380 100644 --- a/frontends/relay-futil/tests/data/let.expect +++ b/frontends/relay-futil/tests/data/let1.expect @@ -13,8 +13,5 @@ ], "e": [ 250 - ], - "main_ret": [ - 250 ] } diff --git a/frontends/relay-futil/tests/data/let.relay b/frontends/relay-futil/tests/data/let1.relay similarity index 100% rename from frontends/relay-futil/tests/data/let.relay rename to frontends/relay-futil/tests/data/let1.relay diff --git a/frontends/relay-futil/tests/data/let.relay.data b/frontends/relay-futil/tests/data/let1.relay.data similarity index 81% rename from frontends/relay-futil/tests/data/let.relay.data rename to frontends/relay-futil/tests/data/let1.relay.data index 403a89d412..3fc10b83aa 100644 --- a/frontends/relay-futil/tests/data/let.relay.data +++ b/frontends/relay-futil/tests/data/let1.relay.data @@ -7,10 +7,6 @@ "data": [5], "bitwidth": 32 }, - "main_ret": { - "data": [0], - "bitwidth": 32 - }, "c": { "data":[3], "bitwidth": 32 diff --git a/frontends/relay-futil/tests/data/let2.expect b/frontends/relay-futil/tests/data/let2.expect index 8c1e6980e2..a4c655ebe8 100644 --- a/frontends/relay-futil/tests/data/let2.expect +++ b/frontends/relay-futil/tests/data/let2.expect @@ -9,9 +9,6 @@ 12 ], "d": [ - 15 - ], - "main_ret": [ - 15 + 36 ] } diff --git a/frontends/relay-futil/tests/data/let2.relay b/frontends/relay-futil/tests/data/let2.relay index bcf94635a6..76db1c9722 100644 --- a/frontends/relay-futil/tests/data/let2.relay +++ b/frontends/relay-futil/tests/data/let2.relay @@ -1,6 +1,6 @@ v0.0.4 fn (%a: int32, %b: int32) { let %c = multiply(%a, %b); - let %d = add(%c, %a); + let %d = multiply(%c, %a); %d } \ No newline at end of file diff --git a/frontends/relay-futil/tests/data/let2.relay.data b/frontends/relay-futil/tests/data/let2.relay.data index f3450e4b2c..128ed0c61d 100644 --- a/frontends/relay-futil/tests/data/let2.relay.data +++ b/frontends/relay-futil/tests/data/let2.relay.data @@ -7,12 +7,8 @@ "data": [4], "bitwidth": 32 }, - "main_ret": { - "data": [0], - "bitwidth": 32 - }, "c": { - "data":[3], + "data":[0], "bitwidth": 32 }, "d": { diff --git a/frontends/relay-futil/tests/data/sub.expect b/frontends/relay-futil/tests/data/sub.expect index 6dfd9d1980..e313c7824d 100644 --- a/frontends/relay-futil/tests/data/sub.expect +++ b/frontends/relay-futil/tests/data/sub.expect @@ -7,8 +7,5 @@ ], "c": [ 42 - ], - "main_ret": [ - 42 ] } diff --git a/frontends/relay-futil/tests/data/sub.relay.data b/frontends/relay-futil/tests/data/sub.relay.data index e008769748..219d0fa867 100644 --- a/frontends/relay-futil/tests/data/sub.relay.data +++ b/frontends/relay-futil/tests/data/sub.relay.data @@ -7,10 +7,6 @@ "data": [7], "bitwidth": 32 }, - "main_ret": { - "data": [0], - "bitwidth": 32 - }, "c": { "data": [0], "bitwidth": 32 diff --git a/frontends/relay-futil/tests/data/tensor2d_add.expect b/frontends/relay-futil/tests/data/tensor2d_add.expect new file mode 100644 index 0000000000..84e8e0c77c --- /dev/null +++ b/frontends/relay-futil/tests/data/tensor2d_add.expect @@ -0,0 +1,44 @@ +{ + "x": [ + [ + 1, + 2, + 3, + 4 + ], + [ + 2, + 4, + 6, + 8 + ] + ], + "x1": [ + [ + 42, + 42, + 42, + 42 + ], + [ + 42, + 42, + 42, + 42 + ] + ], + "y": [ + [ + 41, + 40, + 39, + 38 + ], + [ + 40, + 38, + 36, + 34 + ] + ] +} diff --git a/frontends/relay-futil/tests/data/tensor2d_add.relay b/frontends/relay-futil/tests/data/tensor2d_add.relay new file mode 100644 index 0000000000..4db9fcb099 --- /dev/null +++ b/frontends/relay-futil/tests/data/tensor2d_add.relay @@ -0,0 +1,5 @@ +v0.0.4 +fn (%x: Tensor[(2, 4), int32], %y: Tensor[(2, 4), int32]) { + let %x1 = add(%x, %y); + %x1 +} diff --git a/frontends/relay-futil/tests/data/tensor2d_add.relay.data b/frontends/relay-futil/tests/data/tensor2d_add.relay.data new file mode 100644 index 0000000000..0bf859a4c7 --- /dev/null +++ b/frontends/relay-futil/tests/data/tensor2d_add.relay.data @@ -0,0 +1,14 @@ +{ + "x": { + "data": [[1, 2, 3, 4], [2, 4, 6, 8]], + "bitwidth": 32 + }, + "y": { + "data": [[41, 40, 39, 38], [40, 38, 36, 34]], + "bitwidth": 32 + }, + "x1": { + "data": [[0, 0, 0, 0], [0, 0, 0, 0]], + "bitwidth": 32 + } +} \ No newline at end of file diff --git a/frontends/relay-futil/tests/data/tensor3d_batch_flatten.expect b/frontends/relay-futil/tests/data/tensor3d_batch_flatten.expect new file mode 100644 index 0000000000..4d55d4d415 --- /dev/null +++ b/frontends/relay-futil/tests/data/tensor3d_batch_flatten.expect @@ -0,0 +1,18 @@ +{ + "x": [ + [ + 1, + 2 + ], + [ + 3, + 4 + ] + ], + "x1": [ + 1, + 2, + 3, + 4 + ] +} diff --git a/frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay b/frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay new file mode 100644 index 0000000000..2a5e223fec --- /dev/null +++ b/frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%x: Tensor[(1, 2, 2), int32]) -> Tensor[(1, 4), int32] { + let %x1: Tensor[(1, 4), int32] = nn.batch_flatten(%x); + %x1 +} + diff --git a/frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay.data b/frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay.data new file mode 100644 index 0000000000..b6c5eae239 --- /dev/null +++ b/frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay.data @@ -0,0 +1,10 @@ +{ + "x": { + "data": [[1, 2], [3, 4]], + "bitwidth": 32 + }, + "x1": { + "data": [1, 2, 3, 4], + "bitwidth": 32 + } +} \ No newline at end of file diff --git a/frontends/relay-futil/tests/let1.expect b/frontends/relay-futil/tests/let1.expect index 1652474d30..4e82ded37e 100644 --- a/frontends/relay-futil/tests/let1.expect +++ b/frontends/relay-futil/tests/let1.expect @@ -1,84 +1,108 @@ import "primitives/std.lib"; -component mult (a_out: 32, b_out: 32, in_done: 1) -> (in_write_data: 32, in_write_en: 1, in_addr0: 1) { +component tensor1d_multiply(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { - mult = prim std_mult(32); - c0 = prim std_const(1, 0); + a_read0_0 = prim std_reg(32); + add0 = prim std_add(1); + b_read0_0 = prim std_reg(32); + bin_read0_0 = prim std_reg(32); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(1, 1); + i0 = prim std_reg(1); + le0 = prim std_le(1); + mult_pipe0 = prim std_mult_pipe(32); } wires { - group process_mult { - in_addr0 = c0.out; - mult.left = a_out; - mult.right = b_out; - in_write_en = 1'd1; - in_write_data = mult.out; - process_mult[done] = in_done ? 1'd1; + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; } - } - control { - seq { - process_mult; + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; } - } -} -component function0 (a_out: 32, b_out: 32, in_done: 1) -> (in_write_data: 32, in_write_en: 1, in_addr0: 1) { - cells { - mult_fn = mult; - z = prim std_mem_d1(32, 1, 1); - c0 = prim std_const(1, 0); - } - wires { - group run_mult_fn { - mult_fn.a_out = a_out; - mult_fn.b_out = b_out; - z.write_data = mult_fn.in_write_data; - z.write_en = mult_fn.in_write_en; - z.addr0 = mult_fn.in_addr0; - mult_fn.in_done = z.done; - mult_fn.go = 1'd1; - run_mult_fn[done] = mult_fn.done ? 1'd1; + group let1<"static"=4> { + bin_read0_0.in = mult_pipe0.out; + bin_read0_0.write_en = mult_pipe0.done; + let1[done] = bin_read0_0.done; + mult_pipe0.left = a_read0_0.out; + mult_pipe0.right = b_read0_0.out; + mult_pipe0.go = !mult_pipe0.done ? 1'd1; + } + group upd0<"static"=1> { + a_read0_0.write_en = 1'd1; + a0_addr0 = i0.out; + a_read0_0.in = 1'd1 ? a0_read_data; + upd0[done] = a_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + b_read0_0.write_en = 1'd1; + b0_addr0 = i0.out; + b_read0_0.in = 1'd1 ? b0_read_data; + upd1[done] = b_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + z0_addr0 = i0.out; + z0_write_en = 1'd1; + z0_write_data = 1'd1 ? bin_read0_0.out; + upd2[done] = z0_done ? 1'd1; } - group save_return_value { - z.addr0 = c0.out; - in_addr0 = c0.out; - in_write_en = 1'd1; - in_write_data = z.read_data; - save_return_value[done] = in_done ? 1'd1; + group upd3<"static"=1> { + i0.write_en = 1'd1; + add0.left = i0.out; + add0.right = const2.out; + i0.in = 1'd1 ? add0.out; + upd3[done] = i0.done ? 1'd1; } } + control { seq { - run_mult_fn; - save_return_value; + let0; + while le0.out with cond0 { + seq { + par { + upd0; + upd1; + } + let1; + upd2; + upd3; + } + } } } } component main () -> () { cells { - fn0 = function0; - c0 = prim std_const(1, 0); - main_ret = prim std_mem_d1(32, 1, 1); + z = prim std_mem_d1(32, 1, 1); a = prim std_mem_d1(32, 1, 1); b = prim std_mem_d1(32, 1, 1); + tensor1d_multiply0 = tensor1d_multiply; } wires { - group run_fn0 { - fn0.a_out = a.read_data; - a.addr0 = fn0.in_addr0; - fn0.b_out = b.read_data; - b.addr0 = fn0.in_addr0; - main_ret.addr0 = fn0.in_addr0; - main_ret.write_data = fn0.in_write_data; - main_ret.write_en = fn0.in_write_en; - fn0.in_done = main_ret.done; - fn0.go = 1'd1; - run_fn0[done] = fn0.done ? 1'd1; + group run_tensor1d_multiply { + a.addr0 = tensor1d_multiply0.a0_addr0; + tensor1d_multiply0.a0_read_data = a.read_data; + b.addr0 = tensor1d_multiply0.b0_addr0; + tensor1d_multiply0.b0_read_data = b.read_data; + z.addr0 = tensor1d_multiply0.z0_addr0; + z.write_data = tensor1d_multiply0.z0_write_data; + z.write_en = tensor1d_multiply0.z0_write_en; + tensor1d_multiply0.z0_done = z.done; + tensor1d_multiply0.go = 1'd1; + run_tensor1d_multiply[done] = tensor1d_multiply0.done ? 1'd1; } } control { seq { - run_fn0; + run_tensor1d_multiply; } } } + + diff --git a/frontends/relay-futil/tests/let2.expect b/frontends/relay-futil/tests/let2.expect index e5b099e95f..903681d9a8 100644 --- a/frontends/relay-futil/tests/let2.expect +++ b/frontends/relay-futil/tests/let2.expect @@ -1,118 +1,191 @@ import "primitives/std.lib"; -component mult (a_out: 32, b_out: 32, in_done: 1) -> (in_write_data: 32, in_write_en: 1, in_addr0: 1) { +component tensor1d_add(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { cells { - mult = prim std_mult(32); - c0 = prim std_const(1, 0); + a_read0_0 = prim std_reg(32); + add0 = prim std_add(32); + add1 = prim std_add(1); + c_read0_0 = prim std_reg(32); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(1, 1); + i0 = prim std_reg(1); + le0 = prim std_le(1); } wires { - group process_mult { - in_addr0 = c0.out; - mult.left = a_out; - mult.right = b_out; - in_write_en = 1'd1; - in_write_data = mult.out; - process_mult[done] = in_done ? 1'd1; + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; } - } - control { - seq { - process_mult; + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; } - } -} -component add (c_out: 32, a_out: 32, in_done: 1) -> (in_write_data: 32, in_write_en: 1, in_addr0: 1) { - cells { - add = prim std_add(32); - c0 = prim std_const(1, 0); - } - wires { - group process_add { - in_addr0 = c0.out; - add.left = c_out; - add.right = a_out; - in_write_en = 1'd1; - in_write_data = add.out; - process_add[done] = in_done ? 1'd1; + group upd0<"static"=1> { + c_read0_0.write_en = 1'd1; + c0_addr0 = i0.out; + c_read0_0.in = 1'd1 ? c0_read_data; + upd0[done] = c_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + a_read0_0.write_en = 1'd1; + a0_addr0 = i0.out; + a_read0_0.in = 1'd1 ? a0_read_data; + upd1[done] = a_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + d0_addr0 = i0.out; + d0_write_en = 1'd1; + add0.left = c_read0_0.out; + add0.right = a_read0_0.out; + d0_write_data = 1'd1 ? add0.out; + upd2[done] = d0_done ? 1'd1; + } + group upd3<"static"=1> { + i0.write_en = 1'd1; + add1.left = i0.out; + add1.right = const2.out; + i0.in = 1'd1 ? add1.out; + upd3[done] = i0.done ? 1'd1; } } + control { seq { - process_add; + let0; + while le0.out with cond0 { + seq { + par { + upd0; + upd1; + } + upd2; + upd3; + } + } } } } -component function0 (a_out: 32, b_out: 32, in_done: 1) -> (in_write_data: 32, in_write_en: 1, in_addr0: 1) { +component tensor1d_multiply(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { cells { - add_fn = add; - mult_fn = mult; - c = prim std_mem_d1(32, 1, 1); - d = prim std_mem_d1(32, 1, 1); - c0 = prim std_const(1, 0); + a_read0_0 = prim std_reg(32); + add0 = prim std_add(1); + b_read0_0 = prim std_reg(32); + bin_read0_0 = prim std_reg(32); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(1, 1); + i0 = prim std_reg(1); + le0 = prim std_le(1); + mult_pipe0 = prim std_mult_pipe(32); } wires { - group run_mult_fn { - mult_fn.a_out = a_out; - mult_fn.b_out = b_out; - c.write_data = mult_fn.in_write_data; - c.write_en = mult_fn.in_write_en; - c.addr0 = mult_fn.in_addr0; - mult_fn.in_done = c.done; - mult_fn.go = 1'd1; - run_mult_fn[done] = mult_fn.done ? 1'd1; - } - group run_add_fn { - add_fn.c_out = c.read_data; - add_fn.a_out = a_out; - d.write_data = add_fn.in_write_data; - d.write_en = add_fn.in_write_en; - d.addr0 = add_fn.in_addr0; - add_fn.in_done = d.done; - add_fn.go = 1'd1; - run_add_fn[done] = add_fn.done ? 1'd1; - } - group save_return_value { - d.addr0 = c0.out; - in_addr0 = c0.out; - in_write_en = 1'd1; - in_write_data = d.read_data; - save_return_value[done] = in_done ? 1'd1; + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=4> { + bin_read0_0.in = mult_pipe0.out; + bin_read0_0.write_en = mult_pipe0.done; + let1[done] = bin_read0_0.done; + mult_pipe0.left = a_read0_0.out; + mult_pipe0.right = b_read0_0.out; + mult_pipe0.go = !mult_pipe0.done ? 1'd1; + } + group upd0<"static"=1> { + a_read0_0.write_en = 1'd1; + a0_addr0 = i0.out; + a_read0_0.in = 1'd1 ? a0_read_data; + upd0[done] = a_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + b_read0_0.write_en = 1'd1; + b0_addr0 = i0.out; + b_read0_0.in = 1'd1 ? b0_read_data; + upd1[done] = b_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + c0_addr0 = i0.out; + c0_write_en = 1'd1; + c0_write_data = 1'd1 ? bin_read0_0.out; + upd2[done] = c0_done ? 1'd1; + } + group upd3<"static"=1> { + i0.write_en = 1'd1; + add0.left = i0.out; + add0.right = const2.out; + i0.in = 1'd1 ? add0.out; + upd3[done] = i0.done ? 1'd1; } } + control { seq { - run_mult_fn; - run_add_fn; - save_return_value; + let0; + while le0.out with cond0 { + seq { + par { + upd0; + upd1; + } + let1; + upd2; + upd3; + } + } } } } component main () -> () { cells { - fn0 = function0; - c0 = prim std_const(1, 0); - main_ret = prim std_mem_d1(32, 1, 1); + d = prim std_mem_d1(32, 1, 1); + c = prim std_mem_d1(32, 1, 1); a = prim std_mem_d1(32, 1, 1); + tensor1d_add0 = tensor1d_add; b = prim std_mem_d1(32, 1, 1); + tensor1d_multiply0 = tensor1d_multiply; } wires { - group run_fn0 { - fn0.a_out = a.read_data; - a.addr0 = fn0.in_addr0; - fn0.b_out = b.read_data; - b.addr0 = fn0.in_addr0; - main_ret.addr0 = fn0.in_addr0; - main_ret.write_data = fn0.in_write_data; - main_ret.write_en = fn0.in_write_en; - fn0.in_done = main_ret.done; - fn0.go = 1'd1; - run_fn0[done] = fn0.done ? 1'd1; + group run_tensor1d_multiply { + a.addr0 = tensor1d_multiply0.a0_addr0; + tensor1d_multiply0.a0_read_data = a.read_data; + b.addr0 = tensor1d_multiply0.b0_addr0; + tensor1d_multiply0.b0_read_data = b.read_data; + c.addr0 = tensor1d_multiply0.c0_addr0; + c.write_data = tensor1d_multiply0.c0_write_data; + c.write_en = tensor1d_multiply0.c0_write_en; + tensor1d_multiply0.c0_done = c.done; + tensor1d_multiply0.go = 1'd1; + run_tensor1d_multiply[done] = tensor1d_multiply0.done ? 1'd1; + } + group run_tensor1d_add { + c.addr0 = tensor1d_add0.c0_addr0; + tensor1d_add0.c0_read_data = c.read_data; + a.addr0 = tensor1d_add0.a0_addr0; + tensor1d_add0.a0_read_data = a.read_data; + d.addr0 = tensor1d_add0.d0_addr0; + d.write_data = tensor1d_add0.d0_write_data; + d.write_en = tensor1d_add0.d0_write_en; + tensor1d_add0.d0_done = d.done; + tensor1d_add0.go = 1'd1; + run_tensor1d_add[done] = tensor1d_add0.done ? 1'd1; } } control { seq { - run_fn0; + run_tensor1d_multiply; + run_tensor1d_add; } } } + + diff --git a/frontends/relay-futil/tests/let3.expect b/frontends/relay-futil/tests/let3.expect index 819b8da5da..dbaad55558 100644 --- a/frontends/relay-futil/tests/let3.expect +++ b/frontends/relay-futil/tests/let3.expect @@ -1,152 +1,274 @@ import "primitives/std.lib"; -component sub (a_out: 32, b_out: 32, in_done: 1) -> (in_write_data: 32, in_write_en: 1, in_addr0: 1) { +component tensor1d_multiply(go: 1, clk: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1, e0_read_data: 32, e0_done: 1) -> (done: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1, e0_addr0: 1, e0_write_data: 32, e0_write_en: 1, e0_clk: 1) { cells { - sub = prim std_sub(32); - c0 = prim std_const(1, 0); + add0 = prim std_add(1); + bin_read0_0 = prim std_reg(32); + c_read0_0 = prim std_reg(32); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(1, 1); + d_read0_0 = prim std_reg(32); + i0 = prim std_reg(1); + le0 = prim std_le(1); + mult_pipe0 = prim std_mult_pipe(32); } wires { - group process_sub { - in_addr0 = c0.out; - sub.left = a_out; - sub.right = b_out; - in_write_en = 1'd1; - in_write_data = sub.out; - process_sub[done] = in_done ? 1'd1; + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; } - } - control { - seq { - process_sub; + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; } - } -} -component add (c_out: 32, a_out: 32, in_done: 1) -> (in_write_data: 32, in_write_en: 1, in_addr0: 1) { - cells { - add = prim std_add(32); - c0 = prim std_const(1, 0); - } - wires { - group process_add { - in_addr0 = c0.out; - add.left = c_out; - add.right = a_out; - in_write_en = 1'd1; - in_write_data = add.out; - process_add[done] = in_done ? 1'd1; + group let1<"static"=4> { + bin_read0_0.in = mult_pipe0.out; + bin_read0_0.write_en = mult_pipe0.done; + let1[done] = bin_read0_0.done; + mult_pipe0.left = c_read0_0.out; + mult_pipe0.right = d_read0_0.out; + mult_pipe0.go = !mult_pipe0.done ? 1'd1; + } + group upd0<"static"=1> { + c_read0_0.write_en = 1'd1; + c0_addr0 = i0.out; + c_read0_0.in = 1'd1 ? c0_read_data; + upd0[done] = c_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + d_read0_0.write_en = 1'd1; + d0_addr0 = i0.out; + d_read0_0.in = 1'd1 ? d0_read_data; + upd1[done] = d_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + e0_addr0 = i0.out; + e0_write_en = 1'd1; + e0_write_data = 1'd1 ? bin_read0_0.out; + upd2[done] = e0_done ? 1'd1; + } + group upd3<"static"=1> { + i0.write_en = 1'd1; + add0.left = i0.out; + add0.right = const2.out; + i0.in = 1'd1 ? add0.out; + upd3[done] = i0.done ? 1'd1; } } + control { seq { - process_add; + let0; + while le0.out with cond0 { + seq { + par { + upd0; + upd1; + } + let1; + upd2; + upd3; + } + } } } } -component mult (c_out: 32, d_out: 32, in_done: 1) -> (in_write_data: 32, in_write_en: 1, in_addr0: 1) { +component tensor1d_add(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { cells { - mult = prim std_mult(32); - c0 = prim std_const(1, 0); + a_read0_0 = prim std_reg(32); + add0 = prim std_add(32); + add1 = prim std_add(1); + c_read0_0 = prim std_reg(32); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(1, 1); + i0 = prim std_reg(1); + le0 = prim std_le(1); } wires { - group process_mult { - in_addr0 = c0.out; - mult.left = c_out; - mult.right = d_out; - in_write_en = 1'd1; - in_write_data = mult.out; - process_mult[done] = in_done ? 1'd1; + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group upd0<"static"=1> { + c_read0_0.write_en = 1'd1; + c0_addr0 = i0.out; + c_read0_0.in = 1'd1 ? c0_read_data; + upd0[done] = c_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + a_read0_0.write_en = 1'd1; + a0_addr0 = i0.out; + a_read0_0.in = 1'd1 ? a0_read_data; + upd1[done] = a_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + d0_addr0 = i0.out; + d0_write_en = 1'd1; + add0.left = c_read0_0.out; + add0.right = a_read0_0.out; + d0_write_data = 1'd1 ? add0.out; + upd2[done] = d0_done ? 1'd1; + } + group upd3<"static"=1> { + i0.write_en = 1'd1; + add1.left = i0.out; + add1.right = const2.out; + i0.in = 1'd1 ? add1.out; + upd3[done] = i0.done ? 1'd1; } } + control { seq { - process_mult; + let0; + while le0.out with cond0 { + seq { + par { + upd0; + upd1; + } + upd2; + upd3; + } + } } } } -component function0 (a_out: 32, b_out: 32, in_done: 1) -> (in_write_data: 32, in_write_en: 1, in_addr0: 1) { +component tensor1d_subtract(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { cells { - mult_fn = mult; - add_fn = add; - sub_fn = sub; - c = prim std_mem_d1(32, 1, 1); - d = prim std_mem_d1(32, 1, 1); - e = prim std_mem_d1(32, 1, 1); - c0 = prim std_const(1, 0); + a_read0_0 = prim std_reg(32); + add0 = prim std_add(1); + b_read0_0 = prim std_reg(32); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(1, 1); + i0 = prim std_reg(1); + le0 = prim std_le(1); + sub0 = prim std_sub(32); } wires { - group run_sub_fn { - sub_fn.a_out = a_out; - sub_fn.b_out = b_out; - c.write_data = sub_fn.in_write_data; - c.write_en = sub_fn.in_write_en; - c.addr0 = sub_fn.in_addr0; - sub_fn.in_done = c.done; - sub_fn.go = 1'd1; - run_sub_fn[done] = sub_fn.done ? 1'd1; - } - group run_add_fn { - add_fn.c_out = c.read_data; - add_fn.a_out = a_out; - d.write_data = add_fn.in_write_data; - d.write_en = add_fn.in_write_en; - d.addr0 = add_fn.in_addr0; - add_fn.in_done = d.done; - add_fn.go = 1'd1; - run_add_fn[done] = add_fn.done ? 1'd1; - } - group run_mult_fn { - mult_fn.c_out = c.read_data; - mult_fn.d_out = d.read_data; - e.write_data = mult_fn.in_write_data; - e.write_en = mult_fn.in_write_en; - e.addr0 = mult_fn.in_addr0; - mult_fn.in_done = e.done; - mult_fn.go = 1'd1; - run_mult_fn[done] = mult_fn.done ? 1'd1; - } - group save_return_value { - e.addr0 = c0.out; - in_addr0 = c0.out; - in_write_en = 1'd1; - in_write_data = e.read_data; - save_return_value[done] = in_done ? 1'd1; + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group upd0<"static"=1> { + a_read0_0.write_en = 1'd1; + a0_addr0 = i0.out; + a_read0_0.in = 1'd1 ? a0_read_data; + upd0[done] = a_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + b_read0_0.write_en = 1'd1; + b0_addr0 = i0.out; + b_read0_0.in = 1'd1 ? b0_read_data; + upd1[done] = b_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + c0_addr0 = i0.out; + c0_write_en = 1'd1; + sub0.left = a_read0_0.out; + sub0.right = b_read0_0.out; + c0_write_data = 1'd1 ? sub0.out; + upd2[done] = c0_done ? 1'd1; + } + group upd3<"static"=1> { + i0.write_en = 1'd1; + add0.left = i0.out; + add0.right = const2.out; + i0.in = 1'd1 ? add0.out; + upd3[done] = i0.done ? 1'd1; } } + control { seq { - run_sub_fn; - run_add_fn; - run_mult_fn; - save_return_value; + let0; + while le0.out with cond0 { + seq { + par { + upd0; + upd1; + } + upd2; + upd3; + } + } } } } component main () -> () { cells { - fn0 = function0; - c0 = prim std_const(1, 0); - main_ret = prim std_mem_d1(32, 1, 1); + e = prim std_mem_d1(32, 1, 1); + c = prim std_mem_d1(32, 1, 1); + d = prim std_mem_d1(32, 1, 1); + tensor1d_multiply0 = tensor1d_multiply; a = prim std_mem_d1(32, 1, 1); + tensor1d_add0 = tensor1d_add; b = prim std_mem_d1(32, 1, 1); + tensor1d_subtract0 = tensor1d_subtract; } wires { - group run_fn0 { - fn0.a_out = a.read_data; - a.addr0 = fn0.in_addr0; - fn0.b_out = b.read_data; - b.addr0 = fn0.in_addr0; - main_ret.addr0 = fn0.in_addr0; - main_ret.write_data = fn0.in_write_data; - main_ret.write_en = fn0.in_write_en; - fn0.in_done = main_ret.done; - fn0.go = 1'd1; - run_fn0[done] = fn0.done ? 1'd1; + group run_tensor1d_subtract { + a.addr0 = tensor1d_subtract0.a0_addr0; + tensor1d_subtract0.a0_read_data = a.read_data; + b.addr0 = tensor1d_subtract0.b0_addr0; + tensor1d_subtract0.b0_read_data = b.read_data; + c.addr0 = tensor1d_subtract0.c0_addr0; + c.write_data = tensor1d_subtract0.c0_write_data; + c.write_en = tensor1d_subtract0.c0_write_en; + tensor1d_subtract0.c0_done = c.done; + tensor1d_subtract0.go = 1'd1; + run_tensor1d_subtract[done] = tensor1d_subtract0.done ? 1'd1; + } + group run_tensor1d_add { + c.addr0 = tensor1d_add0.c0_addr0; + tensor1d_add0.c0_read_data = c.read_data; + a.addr0 = tensor1d_add0.a0_addr0; + tensor1d_add0.a0_read_data = a.read_data; + d.addr0 = tensor1d_add0.d0_addr0; + d.write_data = tensor1d_add0.d0_write_data; + d.write_en = tensor1d_add0.d0_write_en; + tensor1d_add0.d0_done = d.done; + tensor1d_add0.go = 1'd1; + run_tensor1d_add[done] = tensor1d_add0.done ? 1'd1; + } + group run_tensor1d_multiply { + c.addr0 = tensor1d_multiply0.c0_addr0; + tensor1d_multiply0.c0_read_data = c.read_data; + d.addr0 = tensor1d_multiply0.d0_addr0; + tensor1d_multiply0.d0_read_data = d.read_data; + e.addr0 = tensor1d_multiply0.e0_addr0; + e.write_data = tensor1d_multiply0.e0_write_data; + e.write_en = tensor1d_multiply0.e0_write_en; + tensor1d_multiply0.e0_done = e.done; + tensor1d_multiply0.go = 1'd1; + run_tensor1d_multiply[done] = tensor1d_multiply0.done ? 1'd1; } } control { seq { - run_fn0; + run_tensor1d_subtract; + run_tensor1d_add; + run_tensor1d_multiply; } } } + + diff --git a/frontends/relay-futil/tests/sub.expect b/frontends/relay-futil/tests/sub.expect index d4fb4c7691..ef4f0d69cc 100644 --- a/frontends/relay-futil/tests/sub.expect +++ b/frontends/relay-futil/tests/sub.expect @@ -1,84 +1,100 @@ import "primitives/std.lib"; -component sub (x_out: 32, y_out: 32, in_done: 1) -> (in_write_data: 32, in_write_en: 1, in_addr0: 1) { +component tensor1d_subtract(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { - sub = prim std_sub(32); - c0 = prim std_const(1, 0); + add0 = prim std_add(1); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(1, 1); + i0 = prim std_reg(1); + le0 = prim std_le(1); + sub0 = prim std_sub(32); + x_read0_0 = prim std_reg(32); + y_read0_0 = prim std_reg(32); } wires { - group process_sub { - in_addr0 = c0.out; - sub.left = x_out; - sub.right = y_out; - in_write_en = 1'd1; - in_write_data = sub.out; - process_sub[done] = in_done ? 1'd1; + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; } - } - control { - seq { - process_sub; + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; } - } -} -component function0 (x_out: 32, y_out: 32, in_done: 1) -> (in_write_data: 32, in_write_en: 1, in_addr0: 1) { - cells { - sub_fn = sub; - z = prim std_mem_d1(32, 1, 1); - c0 = prim std_const(1, 0); - } - wires { - group run_sub_fn { - sub_fn.x_out = x_out; - sub_fn.y_out = y_out; - z.write_data = sub_fn.in_write_data; - z.write_en = sub_fn.in_write_en; - z.addr0 = sub_fn.in_addr0; - sub_fn.in_done = z.done; - sub_fn.go = 1'd1; - run_sub_fn[done] = sub_fn.done ? 1'd1; + group upd0<"static"=1> { + x_read0_0.write_en = 1'd1; + x0_addr0 = i0.out; + x_read0_0.in = 1'd1 ? x0_read_data; + upd0[done] = x_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + y_read0_0.write_en = 1'd1; + y0_addr0 = i0.out; + y_read0_0.in = 1'd1 ? y0_read_data; + upd1[done] = y_read0_0.done ? 1'd1; } - group save_return_value { - z.addr0 = c0.out; - in_addr0 = c0.out; - in_write_en = 1'd1; - in_write_data = z.read_data; - save_return_value[done] = in_done ? 1'd1; + group upd2<"static"=1> { + z0_addr0 = i0.out; + z0_write_en = 1'd1; + sub0.left = x_read0_0.out; + sub0.right = y_read0_0.out; + z0_write_data = 1'd1 ? sub0.out; + upd2[done] = z0_done ? 1'd1; + } + group upd3<"static"=1> { + i0.write_en = 1'd1; + add0.left = i0.out; + add0.right = const2.out; + i0.in = 1'd1 ? add0.out; + upd3[done] = i0.done ? 1'd1; } } + control { seq { - run_sub_fn; - save_return_value; + let0; + while le0.out with cond0 { + seq { + par { + upd0; + upd1; + } + upd2; + upd3; + } + } } } } component main () -> () { cells { - fn0 = function0; - c0 = prim std_const(1, 0); - main_ret = prim std_mem_d1(32, 1, 1); + z = prim std_mem_d1(32, 1, 1); x = prim std_mem_d1(32, 1, 1); y = prim std_mem_d1(32, 1, 1); + tensor1d_subtract0 = tensor1d_subtract; } wires { - group run_fn0 { - fn0.x_out = x.read_data; - x.addr0 = fn0.in_addr0; - fn0.y_out = y.read_data; - y.addr0 = fn0.in_addr0; - main_ret.addr0 = fn0.in_addr0; - main_ret.write_data = fn0.in_write_data; - main_ret.write_en = fn0.in_write_en; - fn0.in_done = main_ret.done; - fn0.go = 1'd1; - run_fn0[done] = fn0.done ? 1'd1; + group run_tensor1d_subtract { + x.addr0 = tensor1d_subtract0.x0_addr0; + tensor1d_subtract0.x0_read_data = x.read_data; + y.addr0 = tensor1d_subtract0.y0_addr0; + tensor1d_subtract0.y0_read_data = y.read_data; + z.addr0 = tensor1d_subtract0.z0_addr0; + z.write_data = tensor1d_subtract0.z0_write_data; + z.write_en = tensor1d_subtract0.z0_write_en; + tensor1d_subtract0.z0_done = z.done; + tensor1d_subtract0.go = 1'd1; + run_tensor1d_subtract[done] = tensor1d_subtract0.done ? 1'd1; } } control { seq { - run_fn0; + run_tensor1d_subtract; } } } + + diff --git a/frontends/relay-futil/tests/tensor2d_add.expect b/frontends/relay-futil/tests/tensor2d_add.expect new file mode 100644 index 0000000000..052d44971c --- /dev/null +++ b/frontends/relay-futil/tests/tensor2d_add.expect @@ -0,0 +1,135 @@ +import "primitives/std.lib"; + +component tensor2d_add(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1, y0_0_read_data: 32, y0_0_done: 1) -> (done: 1, x0_0_addr0: 2, x0_0_addr1: 3, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 2, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, y0_0_addr0: 2, y0_0_addr1: 3, y0_0_write_data: 32, y0_0_write_en: 1, y0_0_clk: 1) { + cells { + add0 = prim std_add(32); + add1 = prim std_add(3); + add2 = prim std_add(2); + const0 = prim std_const(2, 0); + const1 = prim std_const(2, 1); + const2 = prim std_const(3, 0); + const3 = prim std_const(3, 3); + const4 = prim std_const(3, 1); + const5 = prim std_const(2, 1); + i0 = prim std_reg(2); + j0 = prim std_reg(3); + le0 = prim std_le(2); + le1 = prim std_le(3); + x_read0_0 = prim std_reg(32); + y_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let1[done] = j0.done; + } + group upd0<"static"=1> { + x_read0_0.write_en = 1'd1; + x0_0_addr1 = j0.out; + x0_0_addr0 = i0.out; + x_read0_0.in = 1'd1 ? x0_0_read_data; + upd0[done] = x_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + y_read0_0.write_en = 1'd1; + y0_0_addr1 = j0.out; + y0_0_addr0 = i0.out; + y_read0_0.in = 1'd1 ? y0_0_read_data; + upd1[done] = y_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + x10_0_addr1 = j0.out; + x10_0_addr0 = i0.out; + x10_0_write_en = 1'd1; + add0.left = x_read0_0.out; + add0.right = y_read0_0.out; + x10_0_write_data = 1'd1 ? add0.out; + upd2[done] = x10_0_done ? 1'd1; + } + group upd3<"static"=1> { + j0.write_en = 1'd1; + add1.left = j0.out; + add1.right = const4.out; + j0.in = 1'd1 ? add1.out; + upd3[done] = j0.done ? 1'd1; + } + group upd4<"static"=1> { + i0.write_en = 1'd1; + add2.left = i0.out; + add2.right = const5.out; + i0.in = 1'd1 ? add2.out; + upd4[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + par { + upd0; + upd1; + } + upd2; + upd3; + } + } + upd4; + } + } + } + } +} + +component main () -> () { + cells { + x1 = prim std_mem_d2(32, 2, 4, 2, 3); + x = prim std_mem_d2(32, 2, 4, 2, 3); + y = prim std_mem_d2(32, 2, 4, 2, 3); + tensor2d_add0 = tensor2d_add; + } + wires { + group run_tensor2d_add { + x.addr0 = tensor2d_add0.x0_0_addr0; + tensor2d_add0.x0_0_read_data = x.read_data; + x.addr1 = tensor2d_add0.x0_0_addr1; + y.addr0 = tensor2d_add0.y0_0_addr0; + tensor2d_add0.y0_0_read_data = y.read_data; + y.addr1 = tensor2d_add0.y0_0_addr1; + x1.addr0 = tensor2d_add0.x10_0_addr0; + x1.addr1 = tensor2d_add0.x10_0_addr1; + x1.write_data = tensor2d_add0.x10_0_write_data; + x1.write_en = tensor2d_add0.x10_0_write_en; + tensor2d_add0.x10_0_done = x1.done; + tensor2d_add0.go = 1'd1; + run_tensor2d_add[done] = tensor2d_add0.done ? 1'd1; + } + } + control { + seq { + run_tensor2d_add; + } + } +} + + diff --git a/frontends/relay-futil/tests/tensor2d_add.relay b/frontends/relay-futil/tests/tensor2d_add.relay new file mode 100644 index 0000000000..9aba018f7c --- /dev/null +++ b/frontends/relay-futil/tests/tensor2d_add.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%x: Tensor[(2, 4), int32], %y: Tensor[(2, 4), int32]) { + let %x1 = add(%x, %y); + %x1 +} + diff --git a/frontends/relay-futil/tests/tensor3d_batch_flatten.expect b/frontends/relay-futil/tests/tensor3d_batch_flatten.expect new file mode 100644 index 0000000000..d647914125 --- /dev/null +++ b/frontends/relay-futil/tests/tensor3d_batch_flatten.expect @@ -0,0 +1,166 @@ +import "primitives/std.lib"; + +component tensor3d_batch_flatten(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, x0_0_0_addr0: 1, x0_0_0_addr1: 2, x0_0_0_addr2: 2, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { + cells { + add0 = prim std_add(3); + add1 = prim std_add(2); + add2 = prim std_add(2); + add3 = prim std_add(1); + const0 = prim std_const(3, 0); + const1 = prim std_const(1, 0); + const10 = prim std_const(1, 1); + const2 = prim std_const(1, 0); + const3 = prim std_const(2, 0); + const4 = prim std_const(2, 1); + const5 = prim std_const(2, 0); + const6 = prim std_const(2, 1); + const7 = prim std_const(3, 1); + const8 = prim std_const(2, 1); + const9 = prim std_const(2, 1); + i0 = prim std_reg(1); + j0 = prim std_reg(2); + k0 = prim std_reg(2); + l_0 = prim std_reg(3); + le0 = prim std_le(1); + le1 = prim std_le(2); + le2 = prim std_le(2); + x_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const2.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const4.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = k0.out; + le2.right = const6.out; + } + group let0<"static"=1> { + l_0.in = const0.out; + l_0.write_en = 1'd1; + let0[done] = l_0.done; + } + group let1<"static"=1> { + i0.in = const1.out; + i0.write_en = 1'd1; + let1[done] = i0.done; + } + group let2<"static"=1> { + j0.in = const3.out; + j0.write_en = 1'd1; + let2[done] = j0.done; + } + group let3<"static"=1> { + k0.in = const5.out; + k0.write_en = 1'd1; + let3[done] = k0.done; + } + group upd0<"static"=1> { + x_read0_0.write_en = 1'd1; + x0_0_0_addr2 = k0.out; + x0_0_0_addr1 = j0.out; + x0_0_0_addr0 = i0.out; + x_read0_0.in = 1'd1 ? x0_0_0_read_data; + upd0[done] = x_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + x10_0_addr1 = l_0.out; + x10_0_addr0 = i0.out; + x10_0_write_en = 1'd1; + x10_0_write_data = 1'd1 ? x_read0_0.out; + upd1[done] = x10_0_done ? 1'd1; + } + group upd2<"static"=1> { + l_0.write_en = 1'd1; + add0.left = l_0.out; + add0.right = const7.out; + l_0.in = 1'd1 ? add0.out; + upd2[done] = l_0.done ? 1'd1; + } + group upd3<"static"=1> { + k0.write_en = 1'd1; + add1.left = k0.out; + add1.right = const8.out; + k0.in = 1'd1 ? add1.out; + upd3[done] = k0.done ? 1'd1; + } + group upd4<"static"=1> { + j0.write_en = 1'd1; + add2.left = j0.out; + add2.right = const9.out; + j0.in = 1'd1 ? add2.out; + upd4[done] = j0.done ? 1'd1; + } + group upd5<"static"=1> { + i0.write_en = 1'd1; + add3.left = i0.out; + add3.right = const10.out; + i0.in = 1'd1 ? add3.out; + upd5[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + let1; + while le0.out with cond0 { + seq { + let2; + while le1.out with cond1 { + seq { + let3; + while le2.out with cond2 { + seq { + upd0; + upd1; + upd2; + upd3; + } + } + upd4; + } + } + upd5; + } + } + } + } +} + +component main () -> () { + cells { + x1 = prim std_mem_d2(32, 1, 4, 1, 3); + x = prim std_mem_d3(32, 1, 2, 2, 1, 2, 2); + tensor3d_batch_flatten0 = tensor3d_batch_flatten; + } + wires { + group run_tensor3d_batch_flatten { + x.addr0 = tensor3d_batch_flatten0.x0_0_0_addr0; + tensor3d_batch_flatten0.x0_0_0_read_data = x.read_data; + x.addr1 = tensor3d_batch_flatten0.x0_0_0_addr1; + x.addr2 = tensor3d_batch_flatten0.x0_0_0_addr2; + x1.addr0 = tensor3d_batch_flatten0.x10_0_addr0; + x1.addr1 = tensor3d_batch_flatten0.x10_0_addr1; + x1.write_data = tensor3d_batch_flatten0.x10_0_write_data; + x1.write_en = tensor3d_batch_flatten0.x10_0_write_en; + tensor3d_batch_flatten0.x10_0_done = x1.done; + tensor3d_batch_flatten0.go = 1'd1; + run_tensor3d_batch_flatten[done] = tensor3d_batch_flatten0.done ? 1'd1; + } + } + control { + seq { + run_tensor3d_batch_flatten; + } + } +} + + diff --git a/frontends/relay-futil/tests/tensor3d_batch_flatten.relay b/frontends/relay-futil/tests/tensor3d_batch_flatten.relay new file mode 100644 index 0000000000..2a5e223fec --- /dev/null +++ b/frontends/relay-futil/tests/tensor3d_batch_flatten.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%x: Tensor[(1, 2, 2), int32]) -> Tensor[(1, 4), int32] { + let %x1: Tensor[(1, 4), int32] = nn.batch_flatten(%x); + %x1 +} + diff --git a/frontends/relay-futil/utilities.py b/frontends/relay-futil/utilities.py index 51f63c392b..8d34c080e5 100644 --- a/frontends/relay-futil/utilities.py +++ b/frontends/relay-futil/utilities.py @@ -1,5 +1,6 @@ from futil_ast import * from itertools import chain +import math def flatten(l): @@ -22,218 +23,70 @@ def get_bitwidth(type): ''' t = str(type) assert t[0:3] == 'int' or t[0:5] == 'float', f'{t} is not supported.' - begin = 3 if t[0:3] == 'int' else 5 # 'float' + begin = 3 if t[0:3] == 'int' else 5 # 'float' return int(t[begin:len(t)]) -def extract_function_arguments(args): +def get_memory_parameters(type): ''' - Extracts the arguments from a function as port definitions. + Acquires the memory parameters necessary to create a FuTIL memory primitive. ''' - inputs = [] - outputs = [] - for arg in args: - name = arg.name_hint - bitwidth = get_bitwidth(arg.type_annotation) - out_port = f'{name}_out' - inputs.append(FPortDef(name=out_port, bitwidth=bitwidth)) - inputs.append(FPortDef(name="in_done", bitwidth = 1)) - - write_data_port = f'in_write_data' - write_enable_port = f'in_write_en' - addr0_port = f'in_addr0' - - outputs.append(FPortDef(name=write_data_port, bitwidth=bitwidth)) - # TODO(cgyurgyik): Let's instead add a begin and end index. If begin == end, we can assume its 0D. - outputs.append(FPortDef(name=write_enable_port, bitwidth=1)) - outputs.append(FPortDef(name=addr0_port, bitwidth=1)) # FIXME: Hardcoded for 0D tensors. - return inputs, outputs - - -def build_main_body(c: FComponent): - ''' - Builds the main function that will take the last function and run it. - ''' - for cell in reversed(c.cells): - if cell.is_declaration(): - bitwidth = cell.declaration.component.signature.outputs[0].bitwidth - inputs = cell.declaration.component.signature.inputs - outputs = cell.declaration.component.signature.outputs - function_name = cell.declaration.name - break - - index = 0 - cst = FCell(primitive=FPrimitive(name=f'c{index}', data=[1, index], type=PrimitiveType.Constant)) - c.add_cell(cst) - ret = FCell(primitive=FPrimitive(name=f'{c.name}_ret', data=[32, 1, 1], type=PrimitiveType.Memory1D)) - c.add_cell(ret) + t = str(type) + if t[0:3] == 'int' or t[0:5] == 'float': + return [get_bitwidth(type), 1, 1], PrimitiveType.Memory1D + assert t[0:6] == 'Tensor', f'{type} is not currently supported.' - input_arguments = [] - for i in range(0, len(inputs) - 1): - input_name = (inputs[i].name).split('_')[0] - input_arguments.append(input_name) - c.add_cell(FCell(primitive=FPrimitive(name=input_name, data=[bitwidth, 1, 1], type=PrimitiveType.Memory1D))) + string_type = t[t.find(")") + 3:t.find("]")] + string_dimensions = t[t.find("(") + 1:t.find(")")] - group_name = f'run_{function_name}' - write_data_port = outputs[0].name - write_enable_port = outputs[1].name - addr0_port = outputs[2].name + tensor_dimensions = list(map(int, string_dimensions.split(','))) + data = [get_bitwidth(string_type)] + for dimension in tensor_dimensions: data.append(dimension) # Size. + for dimension in tensor_dimensions: data.append(int(math.log2(dimension) + 1)) # Index size. - wires = [] - for i in range(0, len(input_arguments)): - # Build connections for input arguments. - wires.append(FWire(f'{function_name}.{inputs[i].name}', f'{input_arguments[i]}.read_data')) - wires.append(FWire(f'{input_arguments[i]}.addr0', f'{function_name}.{addr0_port}')) + if len(tensor_dimensions) == 2: + type = PrimitiveType.Memory2D + elif len(tensor_dimensions) == 3: + type = PrimitiveType.Memory3D + return data, type - wires.append(FWire(f'{c.name}_ret.addr0', f'{function_name}.{addr0_port}')) - wires.append(FWire(f'{c.name}_ret.write_data', f'{function_name}.{write_data_port}')) - wires.append(FWire(f'{c.name}_ret.write_en', f'{function_name}.{write_enable_port}')) - wires.append(FWire(f'{function_name}.in_done', f'{ret.primitive.name}.done')) - wires.append(FWire(f'{function_name}.go', "1'd1")) - wires.append(FWire(f'{group_name}[done]', f'{function_name}.done ? ' + "1'd1")) - c.wires = [FConnection(group=FGroup(name=group_name, wires=wires, attributes=[]))] - connections = list(filter(lambda w: w.is_group(), c.wires)) - c.controls = [Seq(stmts=list(map(lambda w: w.group.name, connections)))] - - -def build_function_body(c: FComponent): - ''' - Builds the body of the relay function. This is done by building function declarations, - and connecting them with wires. - ''' - declarations = [] +def build_main(c: FComponent): + dahlia_declarations = [] for cell in reversed(c.cells): - if cell.is_declaration(): - declarations.append(cell.declaration) - - for declaration in declarations: - intermediary_output = declaration.intermediary_output - c.add_cell(declaration.intermediary_output) - bitwidth = declaration.component.signature.outputs[0].bitwidth - inputs = declaration.component.signature.inputs - outputs = declaration.component.signature.outputs - function_name = declaration.name - group_name = f'run_{function_name}' - write_data_port = outputs[0].name - write_enable_port = outputs[1].name - addr0_port = outputs[2].name - - wires = get_input_wires(c, declaration) - wires.append(FWire(f'{intermediary_output.primitive.name}.write_data', f'{function_name}.{write_data_port}')) - wires.append(FWire(f'{intermediary_output.primitive.name}.write_en', f'{function_name}.{write_enable_port}')) - wires.append(FWire(f'{intermediary_output.primitive.name}.addr0', f'{function_name}.{addr0_port}')) - wires.append(FWire(f'{function_name}.{inputs[-1].name}', f'{intermediary_output.primitive.name}.done')) - wires.append(FWire(f'{function_name}.go', "1'd1")) - wires.append(FWire(f'{group_name}[done]', f'{function_name}.done ? ' + "1'd1")) + if cell.is_dahlia_declaration(): + dahlia_declarations.append(cell.dahlia_declaration) + + for declaration in dahlia_declarations: + inputs = declaration.inputs + wires = [] + group_name = f'run_{declaration.component_name}' + for input in flatten(inputs): + prim = input.primitive + wires.append(FWire(f'{prim.name}.addr0', f'{declaration.decl_name}.{input.dahlia_name}_addr0')) + wires.append( + FWire(f'{declaration.decl_name}.{input.dahlia_name}_read_data', f'{prim.name}.read_data')) + if not prim.type == PrimitiveType.Memory2D and not prim.type == PrimitiveType.Memory3D: continue + wires.append(FWire(f'{prim.name}.addr1', f'{declaration.decl_name}.{input.dahlia_name}_addr1')) + if not prim.type == PrimitiveType.Memory3D: continue + wires.append(FWire(f'{prim.name}.addr2', f'{declaration.decl_name}.{input.dahlia_name}_addr2')) + + output = declaration.output + wires.append(FWire(f'{output.primitive.name}.addr0', f'{declaration.decl_name}.{output.dahlia_name}_addr0')) + if output.primitive.type == PrimitiveType.Memory2D or output.primitive.type == PrimitiveType.Memory3D: + wires.append(FWire(f'{output.primitive.name}.addr1', f'{declaration.decl_name}.{output.dahlia_name}_addr1')) + if output.primitive.type == PrimitiveType.Memory3D: + wires.append(FWire(f'{output.primitive.name}.addr2', f'{declaration.decl_name}.{output.dahlia_name}_addr2')) + + wires.append( + FWire(f'{output.primitive.name}.write_data', f'{declaration.decl_name}.{output.dahlia_name}_write_data')) + wires.append( + FWire(f'{output.primitive.name}.write_en', f'{declaration.decl_name}.{output.dahlia_name}_write_en')) + wires.append(FWire(f'{declaration.decl_name}.{output.dahlia_name}_done', f'{output.primitive.name}.done')) + wires.append(FWire(f'{declaration.decl_name}.go', "1'd1")) + wires.append(FWire(f'{group_name}[done]', f"{declaration.decl_name}.done ? 1'd1")) c.wires.append(FConnection(group=FGroup(name=group_name, wires=wires, attributes=[]))) - last = declarations[len(declarations) - 1].intermediary_output - build_return_connections(c, last) - # Ensures that only group names make it into the Controls of a component. connections = list(filter(lambda w: w.is_group(), c.wires)) c.controls = [Seq(stmts=list(map(lambda w: w.group.name, connections)))] - return c - - -def get_input_wires(comp: FComponent, decl: FDeclaration): - ''' - Produces the appropriate input wires for a declaration 'decl' within component 'c'. - This is necessary to avoid re-creating cells for intermediary inputs that - already exist. For example, - - fn(%a, %b) { - let %c = multiply(%a, %b); // %a, %b already exist. - let %d = add(%a, %c); // %c is an intermediary. - } - ''' - function_name = decl.name - decl_inputs = decl.component.signature.inputs - intermediary_inputs = flatten(decl.intermediary_inputs) - - finalized_inputs = [] - # Determines whether an input is either an actual input of a previous function or an intermediary input. - # TODO(cgyurgyik): Clean this up once finalized, and use appropriate data structures. - for input in intermediary_inputs: - found = False - for cell in comp.cells: - if not cell.is_primitive() or cell.primitive.name != input.primitive.name: continue - found = True - finalized_inputs.append(f'{input.primitive.name}.read_data') - break - if not found: - finalized_inputs.append(f'{input.primitive.name}_out') - - wires = [] - for i in range(0, len(decl_inputs) - 1): - # Build connections for input arguments. - wires.append(FWire(f'{function_name}.{decl_inputs[i].name}', f'{finalized_inputs[i]}')) - return wires - - -def build_return_connections(comp: FComponent, intermediary_output: FCell): - ''' - Given a component `comp` and the final intermediary output `intermediary_output`, Creates a group to save the value in main. - Example: - Relay Function: - fn (%a, %b) { - let %c = add(%a, %b); - %c - } - This will create the group (and corresponding wires) to connect `c` to the return value in `main`. - ''' - inputs = comp.signature.inputs - outputs = comp.signature.outputs - intermediary_output_name = intermediary_output.primitive.name - - index = primitive = FPrimitive(name="c0", data=[1, 0], type=PrimitiveType.Constant) - comp.add_cell(FCell(primitive=index)) - - group_name = "save_return_value" - wires = [] - wires.append(FWire(f'{intermediary_output_name}.addr0', f'{index.name}.out')) - wires.append(FWire(f'in_addr0', f'{index.name}.out')) - wires.append(FWire(f'in_write_en', "1'd1")) - wires.append(FWire(f'in_write_data', f'{intermediary_output_name}.read_data')) - wires.append(FWire(f'{group_name}[done]', f'{inputs[-1].name} ? ' + "1'd1")) - comp.wires.append((FConnection(group=FGroup(name=group_name, wires=wires, attributes=[])))) - - -def build_tensor_0D_binary_op(call, args, op_name: str): - ''' - Builds the component for a 0D tensor (scalar) binary operation. - ''' - comp: FComponent = FComponent(name=op_name, cells=[], wires=[], - signature=FSignature(inputs=[], outputs=[])) - inputs, outputs = extract_function_arguments(call.args) - comp.signature.inputs = inputs - comp.signature.outputs = outputs - - op = op_name - assert inputs[0].bitwidth == inputs[1].bitwidth, \ - f'Port definitions have different bitwidths for BinOp: {inputs[0].bitwidth}, {inputs[1].bitwidth}' - - cst = FCell(primitive=FPrimitive(name="c0", data=[inputs[-1].bitwidth, 0], type=PrimitiveType.Constant)) - adder = FCell(primitive=FPrimitive(name=op, data=[inputs[0].bitwidth, op_name], type=PrimitiveType.BinOp)) - comp.add_cell(adder) - comp.add_cell(cst) - - write_data_port = outputs[0].name - write_en_port = outputs[1].name - addr0_port = outputs[2].name - - group_name = f'process_{op_name}' - wires = [] - wires.append(FWire(addr0_port, f'{cst.primitive.name}.out')) - wires.append(FWire(f'{op}.left', inputs[0].name)) - wires.append(FWire(f'{op}.right', inputs[1].name)) - wires.append(FWire(write_en_port, "1'd1")) - wires.append(FWire(write_data_port, f'{op}.out')) - - wires.append(FWire(f'{group_name}[done]', f'{inputs[-1].name} ? ' + "1'd1")) - - connections = [FConnection(group=FGroup(name=group_name, wires=wires, attributes=[]))] - comp.wires = connections - comp.controls = [Seq(stmts=list(map(lambda w: w.group.name, connections)))] - return FCell(declaration=FDeclaration(name=op_name + "_fn", component=comp, intermediary_inputs=args)) From 981920213e35dd5ad00306f5f52a173560b54738 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Thu, 5 Nov 2020 12:01:55 -0500 Subject: [PATCH 02/75] cleanup. --- frontends/relay-futil/compiler.py | 2 +- frontends/relay-futil/utilities.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index b10932c239..033506cc1d 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -156,7 +156,7 @@ def compile(program) -> str: MAIN = visitor.visit(program) DAHLIA_COMPONENTS = '\n'.join(visitor.dahlia_components) NEWL = "\n\n" - return f'{PREAMBLE}{NEWL}{DAHLIA_COMPONENTS}{NEWL}{MAIN}{NEWL}' + return f'{PREAMBLE}{NEWL}{DAHLIA_COMPONENTS}{NEWL}{MAIN}' if __name__ == '__main__': diff --git a/frontends/relay-futil/utilities.py b/frontends/relay-futil/utilities.py index 8d34c080e5..89adfec513 100644 --- a/frontends/relay-futil/utilities.py +++ b/frontends/relay-futil/utilities.py @@ -54,8 +54,8 @@ def get_memory_parameters(type): def build_main(c: FComponent): dahlia_declarations = [] for cell in reversed(c.cells): - if cell.is_dahlia_declaration(): - dahlia_declarations.append(cell.dahlia_declaration) + if not cell.is_dahlia_declaration(): continue + dahlia_declarations.append(cell.dahlia_declaration) for declaration in dahlia_declarations: inputs = declaration.inputs From 9e83fa17d09f12843c9b891e671492eadfe5047b Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Thu, 5 Nov 2020 14:50:34 -0500 Subject: [PATCH 03/75] Add temporary file use. --- frontends/relay-futil/dahlia_functions.py | 32 +++++++++---------- frontends/relay-futil/tests/add.expect | 2 -- frontends/relay-futil/tests/let1.expect | 2 -- frontends/relay-futil/tests/let2.expect | 2 -- frontends/relay-futil/tests/let3.expect | 2 -- frontends/relay-futil/tests/sub.expect | 2 -- .../relay-futil/tests/tensor2d_add.expect | 2 -- .../tests/tensor3d_batch_flatten.expect | 2 -- 8 files changed, 16 insertions(+), 30 deletions(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 36873c3106..5989d9af6c 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -1,5 +1,6 @@ import subprocess +from tempfile import NamedTemporaryFile, TemporaryFile from futil_ast import * @@ -23,24 +24,23 @@ def lower_dahlia_program(prog, component_name): I've explicitly removed errors with `2>/dev/null` so they aren't inserted to the file as well. However, this makes debugging difficult as well. ''' - program_string = "\"" + program_string = "" for line in prog.splitlines(): program_string += f'{line}\n' - program_string += "\"" - no_err = "2>/dev/null" - command = \ - f""" - echo {program_string} > temp.fuse && - /Users/cgyurgyik/Projects/dahlia/fuse temp.fuse --lower -b=futil -n={component_name} > lowered.futil {no_err} -l error && - cd ../../ && - cargo run -- frontends/relay-futil/lowered.futil -p externalize > frontends/relay-futil/temp.futil {no_err} && - cd frontends/relay-futil/ - """ - subprocess.Popen(command, stdout=subprocess.PIPE, shell=True).communicate() - dahlia_component = open('temp.futil', 'r').read()[29:] # Skip over importing the primitives library. - subprocess.Popen("rm temp.fuse ; rm lowered.futil ; rm temp.futil", stdout=subprocess.PIPE, - shell=True).communicate() - return dahlia_component + + with NamedTemporaryFile() as tf0, NamedTemporaryFile() as tf1, NamedTemporaryFile() as tf2: + tf0.seek(0) + tf1.seek(0) + tf2.seek(0) + tf0.write(bytes(program_string, 'UTF-8')) + no_err = "2>/dev/null" + command = f""" + /Users/cgyurgyik/Projects/dahlia/fuse {tf0.name} --lower -b=futil -n={component_name} > {tf1.name} {no_err} + && cd ../../ && cargo run -- {tf1.name} -p externalize > {tf2.name} {no_err} + """ + subprocess.Popen(command, stdout=subprocess.PIPE, shell=True).communicate() + dahlia_component = open(tf2.name, 'r').read()[29:] # Skip over importing the primitives library. + return dahlia_component def tensor1d_op(declaration): diff --git a/frontends/relay-futil/tests/add.expect b/frontends/relay-futil/tests/add.expect index c02365332b..a67d257997 100644 --- a/frontends/relay-futil/tests/add.expect +++ b/frontends/relay-futil/tests/add.expect @@ -96,5 +96,3 @@ component main () -> () { } } } - - diff --git a/frontends/relay-futil/tests/let1.expect b/frontends/relay-futil/tests/let1.expect index 4e82ded37e..77312716ef 100644 --- a/frontends/relay-futil/tests/let1.expect +++ b/frontends/relay-futil/tests/let1.expect @@ -104,5 +104,3 @@ component main () -> () { } } } - - diff --git a/frontends/relay-futil/tests/let2.expect b/frontends/relay-futil/tests/let2.expect index 903681d9a8..88da5412ff 100644 --- a/frontends/relay-futil/tests/let2.expect +++ b/frontends/relay-futil/tests/let2.expect @@ -187,5 +187,3 @@ component main () -> () { } } } - - diff --git a/frontends/relay-futil/tests/let3.expect b/frontends/relay-futil/tests/let3.expect index dbaad55558..8885ca1785 100644 --- a/frontends/relay-futil/tests/let3.expect +++ b/frontends/relay-futil/tests/let3.expect @@ -270,5 +270,3 @@ component main () -> () { } } } - - diff --git a/frontends/relay-futil/tests/sub.expect b/frontends/relay-futil/tests/sub.expect index ef4f0d69cc..9cac092744 100644 --- a/frontends/relay-futil/tests/sub.expect +++ b/frontends/relay-futil/tests/sub.expect @@ -96,5 +96,3 @@ component main () -> () { } } } - - diff --git a/frontends/relay-futil/tests/tensor2d_add.expect b/frontends/relay-futil/tests/tensor2d_add.expect index 052d44971c..46db3a2cab 100644 --- a/frontends/relay-futil/tests/tensor2d_add.expect +++ b/frontends/relay-futil/tests/tensor2d_add.expect @@ -131,5 +131,3 @@ component main () -> () { } } } - - diff --git a/frontends/relay-futil/tests/tensor3d_batch_flatten.expect b/frontends/relay-futil/tests/tensor3d_batch_flatten.expect index d647914125..b04fd2d7b3 100644 --- a/frontends/relay-futil/tests/tensor3d_batch_flatten.expect +++ b/frontends/relay-futil/tests/tensor3d_batch_flatten.expect @@ -162,5 +162,3 @@ component main () -> () { } } } - - From 1afa5a179faa6ae372c23d475f8024b721eb6b8c Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Thu, 5 Nov 2020 16:34:50 -0500 Subject: [PATCH 04/75] Now only need changes in two places when adding a Dahlia function. --- frontends/relay-futil/compiler.py | 42 ++++++++++------------- frontends/relay-futil/dahlia_functions.py | 14 ++++---- frontends/relay-futil/futil_ast.py | 14 ++++---- 3 files changed, 32 insertions(+), 38 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 033506cc1d..23d51b25d8 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -35,6 +35,7 @@ def relay_id(self, name): Relay does not explicitly differentiate a variable name if it is used twice. For example, %x = foo(%y); %x1 = bar(%x); // Here, at this level, the name_hint associated with `x1` is still 'x'. + To avoid this, we provide Relay with its own identification dictionary. If 'x' is seen three times, it will produce: 'x', 'x1', x2'. """ @@ -56,24 +57,28 @@ def produce_dahlia_name(self, name, type): if type == PrimitiveType.Memory3D: return dahlia_name + "_0_0" assert False, f'{name} with {type} is not supported yet.' - def get_dahlia_function_type(self, function_name, input_type): + def get_dahlia_declaration(self, function_name, cells, args): """ Returns the corresponding name, Dahlia function type, and op (if it is a binary op, otherwise None). If the function type isn't supported, fails with an assertion. """ - op = None + input_type = cells[0].primitive.type + function = name = op = None if function_name in BuiltInBinaryCalls: op = BuiltInBinaryCalls[function_name] if input_type == PrimitiveType.Memory1D: - return self.relay_id(f'tensor1d_{function_name}'), DahliaFunctionType.Tensor1DBinaryOp, op + name = self.relay_id(f'tensor1d_{function_name}') + function = tensor1d_op if input_type == PrimitiveType.Memory2D: - return self.relay_id(f'tensor2d_{function_name}'), DahliaFunctionType.Tensor2DBinaryOp, op - + name = self.relay_id(f'tensor2d_{function_name}') + function = tensor2d_op if function_name == "nn.batch_flatten": assert input_type == PrimitiveType.Memory3D, f'{input_type} not supported for batch flattening.' - return self.relay_id(f'tensor3d_batch_flatten'), DahliaFunctionType.Tensor3DBatchFlatten, op + function = tensor3d_batch_flatten + name = self.relay_id(f'{function.__name__}') - assert False, f'{function_name} with {input_type} is not supported.' + assert function != None and name != None, f'{function_name} with type {input_type} is not supported.' + return DahliaDeclaration(component_name=name, decl_name=self.id(name), op=op, inputs=args, function=function) def visit_var(self, var): name = self.relay_id(var.name_hint) @@ -91,16 +96,8 @@ def visit_let(self, let): output = variable[0] for value in flatten(values): if not value.is_dahlia_declaration(): continue - decl = value.dahlia_declaration - decl.output = output - # TODO(cgyurgyik): This shouldn't be necessary. To simplify, produce mapping - # between enum and corresponding function. - if decl.type == DahliaFunctionType.Tensor1DBinaryOp: - decl.program = tensor1d_op(decl) - elif decl.type == DahliaFunctionType.Tensor2DBinaryOp: - decl.program = tensor2d_op(decl) - elif decl.type == DahliaFunctionType.Tensor3DBatchFlatten: - decl.program = tensor3d_batch_flatten(decl) + value.dahlia_declaration.output = output + value.dahlia_declaration.invoke() return [body, values] def visit_constant(self, const): @@ -114,14 +111,11 @@ def visit_call(self, call): cells = [] args = [] for arg in call.args: - result = self.visit(arg) - cells.append(result) - args.append(result) + argument = self.visit(arg) + cells.append(argument) + args.append(argument) cells = flatten(cells) - name, type, op = self.get_dahlia_function_type(call.op.name, cells[0].primitive.type) - dahlia_declaration = DahliaDeclaration(component_name=name, decl_name=self.id(name), op=op, - inputs=flatten(args), type=type) - cells.append(FCell(dahlia_declaration=dahlia_declaration)) + cells.append(FCell(dahlia_declaration=self.get_dahlia_declaration(call.op.name, cells, flatten(args)))) return cells def visit_function(self, function): diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 5989d9af6c..4d401bfe3e 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -3,6 +3,9 @@ from tempfile import NamedTemporaryFile, TemporaryFile from futil_ast import * +IMPORT_STATEMENT = """import "primitives/std.lib";\n""" +NO_ERR = "2>/dev/null" + def lower_dahlia_program(prog, component_name): ''' @@ -29,18 +32,17 @@ def lower_dahlia_program(prog, component_name): program_string += f'{line}\n' with NamedTemporaryFile() as tf0, NamedTemporaryFile() as tf1, NamedTemporaryFile() as tf2: + tf0.write(bytes(program_string, 'UTF-8')) tf0.seek(0) tf1.seek(0) tf2.seek(0) - tf0.write(bytes(program_string, 'UTF-8')) - no_err = "2>/dev/null" command = f""" - /Users/cgyurgyik/Projects/dahlia/fuse {tf0.name} --lower -b=futil -n={component_name} > {tf1.name} {no_err} - && cd ../../ && cargo run -- {tf1.name} -p externalize > {tf2.name} {no_err} + /Users/cgyurgyik/Projects/dahlia/fuse {tf0.name} --lower -b=futil -n={component_name} > {tf1.name} \ + {NO_ERR} && cd ../../ && cargo run -- {tf1.name} -p externalize > {tf2.name} {NO_ERR} """ subprocess.Popen(command, stdout=subprocess.PIPE, shell=True).communicate() - dahlia_component = open(tf2.name, 'r').read()[29:] # Skip over importing the primitives library. - return dahlia_component + component = open(tf2.name, 'r').read()[len(IMPORT_STATEMENT):] # Skip over importing the primitives library. + return component def tensor1d_op(declaration): diff --git a/frontends/relay-futil/futil_ast.py b/frontends/relay-futil/futil_ast.py index dbdd21e666..61afa9d236 100644 --- a/frontends/relay-futil/futil_ast.py +++ b/frontends/relay-futil/futil_ast.py @@ -1,14 +1,9 @@ from dataclasses import dataclass from typing import List, Dict +from types import FunctionType from enum import Enum -class DahliaFunctionType(Enum): - Tensor1DBinaryOp = 1 - Tensor2DBinaryOp = 2 - Tensor3DBatchFlatten = 3 - - class PrimitiveType(Enum): Register = 1 Constant = 2 @@ -142,11 +137,14 @@ def add_cell(self, subcomponent: Cell): class DahliaDeclaration: decl_name: str component_name: str - type: DahliaFunctionType op: str = None - program: str = None inputs: List[Cell] = None output: Cell = None + function: FunctionType = None + program: str = None + + def invoke(self): + self.program = self.function(self) @dataclass From 18a8361fc4c2530e2c0cba8ef7c24a973976df11 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Thu, 5 Nov 2020 18:29:09 -0500 Subject: [PATCH 05/75] Assume Dahlia is on path. --- frontends/relay-futil/README.md | 8 ++++---- frontends/relay-futil/compiler.py | 11 ++++++----- frontends/relay-futil/dahlia_functions.py | 8 ++++---- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/frontends/relay-futil/README.md b/frontends/relay-futil/README.md index 0136ec5436..11d60d8a82 100644 --- a/frontends/relay-futil/README.md +++ b/frontends/relay-futil/README.md @@ -7,9 +7,7 @@ This is an in-progress compiler from [TVM][]'s intermediate representation, [Rel Installation ------------ -You will need to install TVM—and we depend on the latest source (unreleased changes for 0.7). There are [official instructions][tvm-install], but these might work for you: - -1. Clone the TVM repository (success was once attained with revision `ccacb1ec1`): +1. Clone the TVM repository with commit hash `ccacb1ec1`): git clone --recursive git@github.com:apache/incubator-tvm.git cd incubator-tvm && git reset --hard ccacb1ec1 @@ -19,7 +17,7 @@ You will need to install TVM—and we depend on the latest source (unreleased ch mkdir build && cd build cp ../cmake/config.cmake . -4. Build (takes about 9 minutes on my MacBook Pro): +4. Build TVM: cmake -G Ninja .. && ninja @@ -35,6 +33,8 @@ You will need to install TVM—and we depend on the latest source (unreleased ch python3 setup.py bdist_wheel pip3 install --user dist/topi-*.whl +7. Install [Dahlia](https://github.com/cucapra/dahlia#set-it-up), which is used when lowering from Relay to FuTIL. +The `fuse` executable is expected to be on your path. Run an Example -------------- diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 23d51b25d8..e125a14d7c 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -64,18 +64,19 @@ def get_dahlia_declaration(self, function_name, cells, args): """ input_type = cells[0].primitive.type function = name = op = None + if function_name in BuiltInBinaryCalls: op = BuiltInBinaryCalls[function_name] if input_type == PrimitiveType.Memory1D: - name = self.relay_id(f'tensor1d_{function_name}') function = tensor1d_op + name = self.relay_id(f'tensor1d_{function_name}') if input_type == PrimitiveType.Memory2D: - name = self.relay_id(f'tensor2d_{function_name}') function = tensor2d_op + name = self.relay_id(f'tensor2d_{function_name}') if function_name == "nn.batch_flatten": - assert input_type == PrimitiveType.Memory3D, f'{input_type} not supported for batch flattening.' - function = tensor3d_batch_flatten - name = self.relay_id(f'{function.__name__}') + if input_type == PrimitiveType.Memory3D: + function = tensor3d_batch_flatten + name = self.relay_id(f'{function.__name__}') assert function != None and name != None, f'{function_name} with type {input_type} is not supported.' return DahliaDeclaration(component_name=name, decl_name=self.id(name), op=op, inputs=args, function=function) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 4d401bfe3e..f4e853a05b 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -28,16 +28,16 @@ def lower_dahlia_program(prog, component_name): to the file as well. However, this makes debugging difficult as well. ''' program_string = "" - for line in prog.splitlines(): - program_string += f'{line}\n' + for line in prog.splitlines(): program_string += f'{line}\n' with NamedTemporaryFile() as tf0, NamedTemporaryFile() as tf1, NamedTemporaryFile() as tf2: tf0.write(bytes(program_string, 'UTF-8')) tf0.seek(0) tf1.seek(0) tf2.seek(0) - command = f""" - /Users/cgyurgyik/Projects/dahlia/fuse {tf0.name} --lower -b=futil -n={component_name} > {tf1.name} \ + command = \ + f""" + fuse {tf0.name} --lower -b=futil -n={component_name} > {tf1.name} \ {NO_ERR} && cd ../../ && cargo run -- {tf1.name} -p externalize > {tf2.name} {NO_ERR} """ subprocess.Popen(command, stdout=subprocess.PIPE, shell=True).communicate() From 3ae22eed973fcde3ee181b926a14c4b1083bf22d Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Thu, 5 Nov 2020 18:41:43 -0500 Subject: [PATCH 06/75] Add DAHLIA_EXEC environment variable. --- frontends/relay-futil/README.md | 3 ++- frontends/relay-futil/dahlia_functions.py | 28 +++++++---------------- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/frontends/relay-futil/README.md b/frontends/relay-futil/README.md index 11d60d8a82..38ed6ba368 100644 --- a/frontends/relay-futil/README.md +++ b/frontends/relay-futil/README.md @@ -34,7 +34,8 @@ Installation pip3 install --user dist/topi-*.whl 7. Install [Dahlia](https://github.com/cucapra/dahlia#set-it-up), which is used when lowering from Relay to FuTIL. -The `fuse` executable is expected to be on your path. +The `fuse` executable is expected to be on your path. Alternatively, it will check to see if the environment variable +`$DAHLIA_EXEC` is set. Run an Example -------------- diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index f4e853a05b..fe16e71d98 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -1,4 +1,5 @@ import subprocess +import os from tempfile import NamedTemporaryFile, TemporaryFile from futil_ast import * @@ -9,23 +10,9 @@ def lower_dahlia_program(prog, component_name): ''' - Takes in a string that represents a Dahlia program, lowers it to FuTIL, and applies the `externalize` pass. - This is just for experimental purposes, and needs to be replaced. - More bluntly, this does the following: - 1. Copies dahlia program `prog` to a temporary file `temp.fuse`. - $ echo `program_string` > temp.fuse - - 2. Lowers `temp.fuse` to FuTIL with the name changed to `component_name`, and saves it in `lowered.futil`. - $ ./fuse temp.fuse --lower -b=futil -n=component_name > lowered.futil - - 3. Runs the 'externalize' pass on the `lowered.futil` file. - $ cargo run -- lowered.futil -p externalize > temp.futil - - 4. Copies the output from `lowered.futil`, except for the first line (we don't want another copy of the import). - - TODO(cgyurgyik): As you'll see below, this only works on my local machine. - I've explicitly removed errors with `2>/dev/null` so they aren't inserted - to the file as well. However, this makes debugging difficult as well. + Takes in a string representation of a Dahlia program, lowers it to FuTIL with the given `component_name`, + and applies the `externalize` pass. This pass exposes the inputs and outputs of primitive types that are + declared external, e.g. `std_mem_d1_ext`, and places them in the inputs and outputs of the component. ''' program_string = "" for line in prog.splitlines(): program_string += f'{line}\n' @@ -35,13 +22,14 @@ def lower_dahlia_program(prog, component_name): tf0.seek(0) tf1.seek(0) tf2.seek(0) + fuse_binary = os.environ['DAHLIA_EXEC'] if 'DAHLIA_EXEC' in os.environ else 'fuse' command = \ f""" - fuse {tf0.name} --lower -b=futil -n={component_name} > {tf1.name} \ - {NO_ERR} && cd ../../ && cargo run -- {tf1.name} -p externalize > {tf2.name} {NO_ERR} + {fuse_binary} {tf0.name} --lower -b=futil -n={component_name} > {tf1.name} {NO_ERR} \ + && cd ../../ && cargo run -- {tf1.name} -p externalize > {tf2.name} {NO_ERR} """ subprocess.Popen(command, stdout=subprocess.PIPE, shell=True).communicate() - component = open(tf2.name, 'r').read()[len(IMPORT_STATEMENT):] # Skip over importing the primitives library. + component = tf2.read().decode()[len(IMPORT_STATEMENT):] # Skip over importing the primitives library. return component From dc96f853261953378b6270c85c334e471acd4b04 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Thu, 5 Nov 2020 18:48:52 -0500 Subject: [PATCH 07/75] cleanup. --- frontends/relay-futil/compiler.py | 3 ++- frontends/relay-futil/futil_ast.py | 9 +++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index e125a14d7c..7aa7321589 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -25,6 +25,7 @@ def __init__(self): def id(self, name): """ Provides a unique identification for a given name. + For example, if 'a' is seen three times, it will produce: 'a0', 'a1', 'a2'. """ id_number = self.id_dictionary[name] self.id_dictionary[name] += 1 @@ -150,7 +151,7 @@ def compile(program) -> str: PREAMBLE = """import "primitives/std.lib";""" MAIN = visitor.visit(program) DAHLIA_COMPONENTS = '\n'.join(visitor.dahlia_components) - NEWL = "\n\n" + NEWL = '\n\n' return f'{PREAMBLE}{NEWL}{DAHLIA_COMPONENTS}{NEWL}{MAIN}' diff --git a/frontends/relay-futil/futil_ast.py b/frontends/relay-futil/futil_ast.py index 61afa9d236..cef66aef0a 100644 --- a/frontends/relay-futil/futil_ast.py +++ b/frontends/relay-futil/futil_ast.py @@ -165,11 +165,8 @@ class FCell(Cell): declaration: FDeclaration = None dahlia_declaration: DahliaDeclaration = None - def is_primitive(self): - return self.primitive != None + def is_primitive(self): return self.primitive != None - def is_declaration(self): - return self.declaration != None + def is_declaration(self): return self.declaration != None - def is_dahlia_declaration(self): - return self.dahlia_declaration != None + def is_dahlia_declaration(self): return self.dahlia_declaration != None From c8088eb2541613e1bec8b192880c46d082390e12 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Thu, 5 Nov 2020 20:28:03 -0500 Subject: [PATCH 08/75] Cleanup. --- frontends/relay-futil/README.md | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/frontends/relay-futil/README.md b/frontends/relay-futil/README.md index 38ed6ba368..848f4abd3d 100644 --- a/frontends/relay-futil/README.md +++ b/frontends/relay-futil/README.md @@ -23,19 +23,16 @@ Installation 5. Install the `tvm` Python package by building a [wheel][]: - cd ../python - python3 setup.py bdist_wheel + cd ../python && python3 setup.py bdist_wheel pip3 install --user dist/tvm-*.whl 6. Install the accompanying `topi` Python package: - cd ../topi/python - python3 setup.py bdist_wheel + cd ../topi/python && python3 setup.py bdist_wheel pip3 install --user dist/topi-*.whl -7. Install [Dahlia](https://github.com/cucapra/dahlia#set-it-up), which is used when lowering from Relay to FuTIL. -The `fuse` executable is expected to be on your path. Alternatively, it will check to see if the environment variable -`$DAHLIA_EXEC` is set. +7. Install [Dahlia][], which is used when lowering from Relay to FuTIL. +The `fuse` executable is expected to be on your path. Alternatively, it will check to see if the environment variable `$DAHLIA_EXEC` is set. Run an Example -------------- @@ -63,7 +60,7 @@ The Relay text format parser requires ANTLR, so also do this: Then, just type `runt` to run the tests. - +[dahlia]: https://github.com/cucapra/dahlia#set-it-up [tvm]: https://tvm.apache.org [tvm-install]: https://tvm.apache.org/docs/install/from_source.html#developers-get-source-from-github [relay]: https://tvm.apache.org/docs/api/python/relay/index.html From 7d753704c748b21ebef4b1eec0df67157277e590 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 6 Nov 2020 18:16:44 -0500 Subject: [PATCH 09/75] Add incorrect batch_matmul. --- frontends/relay-futil/compiler.py | 3 + frontends/relay-futil/dahlia_functions.py | 85 +++---- frontends/relay-futil/example.py | 10 +- .../relay-futil/tests/batch_matmul.expect | 218 ++++++++++++++++++ .../relay-futil/tests/batch_matmul.relay | 6 + .../tests/data/batch_matmul.expect | 161 +++++++++++++ .../relay-futil/tests/data/batch_matmul.relay | 5 + .../tests/data/batch_matmul.relay.data | 14 ++ 8 files changed, 461 insertions(+), 41 deletions(-) create mode 100644 frontends/relay-futil/tests/batch_matmul.expect create mode 100644 frontends/relay-futil/tests/batch_matmul.relay create mode 100644 frontends/relay-futil/tests/data/batch_matmul.expect create mode 100644 frontends/relay-futil/tests/data/batch_matmul.relay create mode 100644 frontends/relay-futil/tests/data/batch_matmul.relay.data diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 7aa7321589..5f922522ae 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -78,6 +78,9 @@ def get_dahlia_declaration(self, function_name, cells, args): if input_type == PrimitiveType.Memory3D: function = tensor3d_batch_flatten name = self.relay_id(f'{function.__name__}') + elif function_name == "nn.batch_matmul": + function = batch_matmul + name = self.relay_id(f'{function.__name__}') assert function != None and name != None, f'{function_name} with type {input_type} is not supported.' return DahliaDeclaration(component_name=name, decl_name=self.id(name), op=op, inputs=args, function=function) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index fe16e71d98..450e914185 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -12,7 +12,7 @@ def lower_dahlia_program(prog, component_name): ''' Takes in a string representation of a Dahlia program, lowers it to FuTIL with the given `component_name`, and applies the `externalize` pass. This pass exposes the inputs and outputs of primitive types that are - declared external, e.g. `std_mem_d1_ext`, and places them in the inputs and outputs of the component. + declared external, e.g. `std_mem_d1_ext`, and places them in the inputs and outputs of the respective component. ''' program_string = "" for line in prog.splitlines(): program_string += f'{line}\n' @@ -34,17 +34,12 @@ def lower_dahlia_program(prog, component_name): def tensor1d_op(declaration): - op1 = declaration.inputs[0].primitive - op2 = declaration.inputs[1].primitive - res = declaration.output.primitive + op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive assert op1.type == PrimitiveType.Memory1D and op1.type == op2.type and op2.type == res.type - assert op1.data[0] == op2.data[0] and op1.data[0] == res.data[0] - assert op1.data[1] == op2.data[1] and op2.data[1] == res.data[1] - assert op1.data[2] == op2.data[2] and op2.data[2] == res.data[2] - bitwidth = op1.data[0] - size = op1.data[1] - index_size = op1.data[2] + assert op1.data[0] == op2.data[0] and op1.data[0] == res.data[0] and op1.data[1] == op2.data[1] + assert op1.data[2] == op2.data[2] and op2.data[2] == res.data[2] and op2.data[1] == res.data[1] + bitwidth, size, index_size = op1.data[0], op1.data[1], op1.data[2] return lower_dahlia_program(f""" decl {op1.name}: ubit<{bitwidth}>[{size}]; decl {op2.name}: ubit<{bitwidth}>[{size}]; @@ -55,22 +50,13 @@ def tensor1d_op(declaration): def tensor2d_op(declaration): - op1 = declaration.inputs[0].primitive - op2 = declaration.inputs[1].primitive - res = declaration.output.primitive - + op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive + bitwidth, size0, size1, index_size0, index_size1 = op1.data[0], op1.data[1], op1.data[2], op1.data[3], op1.data[4] assert op1.type == PrimitiveType.Memory2D and op1.type == op2.type and op2.type == res.type - assert op1.data[0] == op2.data[0] and op1.data[0] == res.data[0] - assert op1.data[1] == op2.data[1] and op2.data[1] == res.data[1] - assert op1.data[2] == op2.data[2] and op2.data[2] == res.data[2] - assert op1.data[3] == op2.data[3] and op2.data[3] == res.data[3] - assert op1.data[4] == op2.data[4] and op2.data[4] == res.data[4] - - bitwidth = op1.data[0] - size0 = op1.data[1] - size1 = op1.data[2] - index_size0 = op1.data[3] - index_size1 = op1.data[4] + assert bitwidth == op2.data[0] and op1.data[0] == res.data[0] and op2.data[4] == res.data[4] + assert size0 == op2.data[1] and op2.data[1] == res.data[1] and size1 == op2.data[2] and op2.data[2] == res.data[2] + assert index_size0 == op2.data[3] and op2.data[3] == res.data[3] and index_size1 == op2.data[4] + return lower_dahlia_program(f""" decl {op1.name}: ubit<{bitwidth}>[{size0}][{size1}]; decl {op2.name}: ubit<{bitwidth}>[{size0}][{size1}]; @@ -83,21 +69,12 @@ def tensor2d_op(declaration): def tensor3d_batch_flatten(declaration): - op1 = declaration.inputs[0].primitive - res = declaration.output.primitive - - bitwidth = op1.data[0] - op1_size0 = op1.data[1] - op1_size1 = op1.data[2] - op1_size2 = op1.data[3] - op1_index_size0 = op1.data[4] - op1_index_size1 = op1.data[5] - op1_index_size2 = op1.data[6] - res_bitwidth = res.data[0] - res_size0 = res.data[1] - res_size1 = res.data[2] - res_index_size0 = res.data[3] - res_index_size1 = res.data[4] + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_flatten""" + op1, res = declaration.inputs[0].primitive, declaration.output.primitive + bitwidth, op1_size0, op1_size1, op1_size2 = op1.data[0], op1.data[1], op1.data[2], op1.data[3] + op1_index_size0, op1_index_size1, op1_index_size2 = op1.data[4], op1.data[5], op1.data[6] + res_bitwidth, res_size0, res_size1 = res.data[0], res.data[1], res.data[2] + res_index_size0, res_index_size1 = res.data[3], res.data[4] assert op1.type == PrimitiveType.Memory3D and res_size1 == op1_size1 * op1_size2 and res_size0 == op1_size0 assert res.type == PrimitiveType.Memory2D and res_bitwidth == bitwidth @@ -113,3 +90,31 @@ def tensor3d_batch_flatten(declaration): }} }} }}""", declaration.component_name) + + +def batch_matmul(declaration): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_matmul""" + assert False, "Unimplemented. nn.batch_matmul currently does not execute properly." + op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive + bitwidth, M1_size0, M1_size1, M1_size2 = op1.data[0], op1.data[1], op1.data[2], op1.data[3] + M1_index_size0, M1_index_size1, M1_index_size2 = op1.data[4], op1.data[5], op1.data[6] + M2_size0, M2_size1, M2_size2 = op2.data[1], op2.data[2], op2.data[3] + M2_index_size0, M2_index_size1, M2_index_size2 = op2.data[4], op2.data[5], op2.data[6] + assert op1.type == PrimitiveType.Memory3D and op1.type == op2.type and op2.type == res.type + assert M2_size1 == M1_size2 and bitwidth == op2.data[0] and M1_size0 == M2_size0 + + return lower_dahlia_program(f""" + decl {op1.name}: ubit<{bitwidth}>[{M1_size0}][{M1_size1}][{M1_size2}]; + decl {op2.name}: ubit<{bitwidth}>[{M2_size0}][{M2_size1}][{M2_size2}]; + decl {res.name}: ubit<{bitwidth}>[{M1_size0}][{M1_size1}][{M2_size2}]; + for (let i: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ + for (let j: ubit<{M1_index_size1}> = 0..{M1_size1}) {{ + for (let k: ubit<{M2_index_size2}> = 0..{M2_size2}) {{ + for (let l: ubit<{M1_index_size2}> = 0..{M1_size2}) {{ + let prod = {op1.name}[i][j][l] * {op2.name}[i][l][k]; + }} combine {{ + {res.name}[i][j][k] += prod; + }} + }} + }} + }}""", declaration.component_name) diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index 9d0c06d8f8..d5f3d15011 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -27,6 +27,14 @@ def batch_flatten(): return relay.Function([x], relay.nn.batch_flatten(x)) +def batch_matmul(): + """Add together two 2-dimensional tensors in Relay. + """ + x = relay.var("x", relay.TensorType((1, 3, 2), "int32")) + y = relay.var("y", relay.TensorType((1, 2, 3), "int32")) + return relay.Function([x, y], relay.nn.batch_matmul(x, y)) + + def mlp_net(): """The MLP test from Relay. """ @@ -34,7 +42,7 @@ def mlp_net(): return mlp.get_net(1) -ALL_FUNCS = [add, tensor_add, batch_flatten, mlp_net] +ALL_FUNCS = [add, tensor_add, batch_flatten, batch_matmul, mlp_net] FUNC_NAMES = list(map(lambda x: x.__name__, ALL_FUNCS)) diff --git a/frontends/relay-futil/tests/batch_matmul.expect b/frontends/relay-futil/tests/batch_matmul.expect new file mode 100644 index 0000000000..7e2e4c3cc9 --- /dev/null +++ b/frontends/relay-futil/tests/batch_matmul.expect @@ -0,0 +1,218 @@ +import "primitives/std.lib"; + +component batch_matmul(go: 1, clk: 1, a0_0_0_read_data: 32, a0_0_0_done: 1, b0_0_0_read_data: 32, b0_0_0_done: 1, x0_0_0_read_data: 32, x0_0_0_done: 1) -> (done: 1, a0_0_0_addr0: 2, a0_0_0_addr1: 2, a0_0_0_addr2: 2, a0_0_0_write_data: 32, a0_0_0_write_en: 1, a0_0_0_clk: 1, b0_0_0_addr0: 2, b0_0_0_addr1: 2, b0_0_0_addr2: 2, b0_0_0_write_data: 32, b0_0_0_write_en: 1, b0_0_0_clk: 1, x0_0_0_addr0: 2, x0_0_0_addr1: 2, x0_0_0_addr2: 2, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1) { + cells { + a_read0_0 = prim std_reg(32); + add0 = prim std_add(32); + add1 = prim std_add(2); + add2 = prim std_add(2); + add3 = prim std_add(2); + add4 = prim std_add(2); + b_read0_0 = prim std_reg(32); + bin_read0_0 = prim std_reg(32); + const0 = prim std_const(2, 0); + const1 = prim std_const(2, 2); + const10 = prim std_const(2, 1); + const11 = prim std_const(2, 1); + const2 = prim std_const(2, 0); + const3 = prim std_const(2, 2); + const4 = prim std_const(2, 0); + const5 = prim std_const(2, 2); + const6 = prim std_const(2, 0); + const7 = prim std_const(2, 2); + const8 = prim std_const(2, 1); + const9 = prim std_const(2, 1); + i0 = prim std_reg(2); + j0 = prim std_reg(2); + k0 = prim std_reg(2); + l0 = prim std_reg(2); + le0 = prim std_le(2); + le1 = prim std_le(2); + le2 = prim std_le(2); + le3 = prim std_le(2); + mult_pipe0 = prim std_mult_pipe(32); + prod_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = k0.out; + le2.right = const5.out; + } + group cond3<"static"=0> { + cond3[done] = 1'd1; + le3.left = l0.out; + le3.right = const7.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let1[done] = j0.done; + } + group let2<"static"=1> { + k0.in = const4.out; + k0.write_en = 1'd1; + let2[done] = k0.done; + } + group let3<"static"=1> { + l0.in = const6.out; + l0.write_en = 1'd1; + let3[done] = l0.done; + } + group let4<"static"=4> { + bin_read0_0.in = mult_pipe0.out; + bin_read0_0.write_en = mult_pipe0.done; + let4[done] = bin_read0_0.done; + mult_pipe0.left = a_read0_0.out; + mult_pipe0.right = b_read0_0.out; + mult_pipe0.go = !mult_pipe0.done ? 1'd1; + } + group let5<"static"=1> { + prod_0.in = bin_read0_0.out; + prod_0.write_en = 1'd1; + let5[done] = prod_0.done; + } + group upd0<"static"=1> { + a_read0_0.write_en = 1'd1; + a0_0_0_addr2 = l0.out; + a0_0_0_addr1 = j0.out; + a0_0_0_addr0 = i0.out; + a_read0_0.in = 1'd1 ? a0_0_0_read_data; + upd0[done] = a_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + b_read0_0.write_en = 1'd1; + b0_0_0_addr2 = k0.out; + b0_0_0_addr1 = l0.out; + b0_0_0_addr0 = i0.out; + b_read0_0.in = 1'd1 ? b0_0_0_read_data; + upd1[done] = b_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + x0_0_0_addr2 = k0.out; + x0_0_0_addr1 = j0.out; + x0_0_0_addr0 = i0.out; + x0_0_0_write_en = 1'd1; + add0.left = x0_0_0_read_data; + add0.right = prod_0.out; + x0_0_0_addr2 = k0.out; + x0_0_0_addr1 = j0.out; + x0_0_0_addr0 = i0.out; + x0_0_0_write_data = 1'd1 ? add0.out; + upd2[done] = x0_0_0_done ? 1'd1; + } + group upd3<"static"=1> { + l0.write_en = 1'd1; + add1.left = l0.out; + add1.right = const8.out; + l0.in = 1'd1 ? add1.out; + upd3[done] = l0.done ? 1'd1; + } + group upd4<"static"=1> { + k0.write_en = 1'd1; + add2.left = k0.out; + add2.right = const9.out; + k0.in = 1'd1 ? add2.out; + upd4[done] = k0.done ? 1'd1; + } + group upd5<"static"=1> { + j0.write_en = 1'd1; + add3.left = j0.out; + add3.right = const10.out; + j0.in = 1'd1 ? add3.out; + upd5[done] = j0.done ? 1'd1; + } + group upd6<"static"=1> { + i0.write_en = 1'd1; + add4.left = i0.out; + add4.right = const11.out; + i0.in = 1'd1 ? add4.out; + upd6[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + let2; + while le2.out with cond2 { + seq { + let3; + while le3.out with cond3 { + seq { + par { + upd0; + upd1; + } + let4; + let5; + upd2; + upd3; + } + } + upd4; + } + } + upd5; + } + } + upd6; + } + } + } + } +} + +component main () -> () { + cells { + x = prim std_mem_d3(32, 3, 3, 3, 2, 2, 2); + a = prim std_mem_d3(32, 3, 3, 3, 2, 2, 2); + b = prim std_mem_d3(32, 3, 3, 3, 2, 2, 2); + batch_matmul0 = batch_matmul; + } + wires { + group run_batch_matmul { + a.addr0 = batch_matmul0.a0_0_0_addr0; + batch_matmul0.a0_0_0_read_data = a.read_data; + a.addr1 = batch_matmul0.a0_0_0_addr1; + a.addr2 = batch_matmul0.a0_0_0_addr2; + b.addr0 = batch_matmul0.b0_0_0_addr0; + batch_matmul0.b0_0_0_read_data = b.read_data; + b.addr1 = batch_matmul0.b0_0_0_addr1; + b.addr2 = batch_matmul0.b0_0_0_addr2; + x.addr0 = batch_matmul0.x0_0_0_addr0; + x.addr1 = batch_matmul0.x0_0_0_addr1; + x.addr2 = batch_matmul0.x0_0_0_addr2; + x.write_data = batch_matmul0.x0_0_0_write_data; + x.write_en = batch_matmul0.x0_0_0_write_en; + batch_matmul0.x0_0_0_done = x.done; + batch_matmul0.go = 1'd1; + run_batch_matmul[done] = batch_matmul0.done ? 1'd1; + } + } + control { + seq { + run_batch_matmul; + } + } +} diff --git a/frontends/relay-futil/tests/batch_matmul.relay b/frontends/relay-futil/tests/batch_matmul.relay new file mode 100644 index 0000000000..3c33743956 --- /dev/null +++ b/frontends/relay-futil/tests/batch_matmul.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%a: Tensor[(3, 3, 3), int32], %b: Tensor[(3, 3, 3), int32]) -> Tensor[(3, 3, 3), int32] { + let %x: Tensor[(3, 3, 3), int32] = nn.batch_matmul(%a, %b); + %x +} + diff --git a/frontends/relay-futil/tests/data/batch_matmul.expect b/frontends/relay-futil/tests/data/batch_matmul.expect new file mode 100644 index 0000000000..f8d5eb52f9 --- /dev/null +++ b/frontends/relay-futil/tests/data/batch_matmul.expect @@ -0,0 +1,161 @@ +{ + "a": [ + [ + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ] + ], + [ + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ] + ], + [ + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ] + ] + ], + "b": [ + [ + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ] + ], + [ + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ] + ], + [ + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ] + ] + ], + "x": [ + [ + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ] + ], + [ + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ] + ], + [ + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ] + ] + ] +} diff --git a/frontends/relay-futil/tests/data/batch_matmul.relay b/frontends/relay-futil/tests/data/batch_matmul.relay new file mode 100644 index 0000000000..20f860a2b7 --- /dev/null +++ b/frontends/relay-futil/tests/data/batch_matmul.relay @@ -0,0 +1,5 @@ +v0.0.4 +fn (%a: Tensor[(3, 3, 3), int32], %b: Tensor[(3, 3, 3), int32]) -> Tensor[(3, 3, 3), int32] { + let %x: Tensor[(3, 3, 3), int32] = nn.batch_matmul(%a, %b); + %x +} \ No newline at end of file diff --git a/frontends/relay-futil/tests/data/batch_matmul.relay.data b/frontends/relay-futil/tests/data/batch_matmul.relay.data new file mode 100644 index 0000000000..172b2d3ac2 --- /dev/null +++ b/frontends/relay-futil/tests/data/batch_matmul.relay.data @@ -0,0 +1,14 @@ +{ + "a": { + "data": [[[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]], + "bitwidth": 32 + }, + "b": { + "data": [[[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]], + "bitwidth": 32 + }, + "x": { + "data": [[[0, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 0], [0, 0, 0]]], + "bitwidth": 32 + } +} \ No newline at end of file From d73ace84367bd55e7e73f5589f01e92a7e172999 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 6 Nov 2020 18:30:42 -0500 Subject: [PATCH 10/75] [Relay] Add actual expect for matrix multiply. --- .../tests/data/batch_matmul.expect | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/frontends/relay-futil/tests/data/batch_matmul.expect b/frontends/relay-futil/tests/data/batch_matmul.expect index f8d5eb52f9..f5bbcf1366 100644 --- a/frontends/relay-futil/tests/data/batch_matmul.expect +++ b/frontends/relay-futil/tests/data/batch_matmul.expect @@ -108,53 +108,53 @@ "x": [ [ [ - 1, - 1, - 1 + 3, + 3, + 3 ], [ - 1, - 1, - 1 + 3, + 3, + 3 ], [ - 1, - 1, - 1 + 3, + 3, + 3 ] ], [ [ - 1, - 1, - 1 + 3, + 3, + 3 ], [ - 1, - 1, - 1 + 3, + 3, + 3 ], [ - 1, - 1, - 1 + 3, + 3, + 3 ] ], [ [ - 1, - 1, - 1 + 3, + 3, + 3 ], [ - 1, - 1, - 1 + 3, + 3, + 3 ], [ - 1, - 1, - 1 + 3, + 3, + 3 ] ] ] From 57f85be9568af360cd53e3292fd73615e225cde8 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 6 Nov 2020 21:52:44 -0500 Subject: [PATCH 11/75] Add succinct example. --- frontends/relay-futil/dahlia_functions.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 450e914185..6ed7a6df5c 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -13,6 +13,25 @@ def lower_dahlia_program(prog, component_name): Takes in a string representation of a Dahlia program, lowers it to FuTIL with the given `component_name`, and applies the `externalize` pass. This pass exposes the inputs and outputs of primitive types that are declared external, e.g. `std_mem_d1_ext`, and places them in the inputs and outputs of the respective component. + + Example: + ------ Dahlia, component name: ProcessX ------ + decl X: ubit<32>[4]; + ... + + ------------- Lower to FuTIL ----------------- + component ProcessX() -> () { + X = prim std_mem_d1_ext(32, 4, 2); + ... + } + + ------------- Externalize Pass --------------- + component ProcessX + (go: 1, clk: 1, X0_read_data: 32, X0_done: 1) -> + (done: 1, X0_addr0: 2, X0_write_data: 32, X0_write_en: 1, X0_clk: 1) { + ... + } + ''' program_string = "" for line in prog.splitlines(): program_string += f'{line}\n' From d87b5f3e4db1725b7561c6acb947116240194360 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Thu, 12 Nov 2020 10:17:37 -0500 Subject: [PATCH 12/75] [relay] Add more functions, cleanup. --- frontends/relay-futil/README.md | 8 +- frontends/relay-futil/compiler.py | 66 ++- frontends/relay-futil/dahlia_functions.py | 156 +++++-- frontends/relay-futil/example.py | 61 +-- frontends/relay-futil/futil_ast.py | 3 +- frontends/relay-futil/pretty_print.py | 20 +- frontends/relay-futil/runt.toml | 2 +- .../relay-futil/tests/batch_matmul.expect | 430 +++++++++++++----- .../relay-futil/tests/batch_matmul.relay | 4 +- frontends/relay-futil/tests/bias_add.expect | 131 ++++++ frontends/relay-futil/tests/bias_add.relay | 7 + ...ch_flatten.expect => batch_flatten.expect} | 0 ...atch_flatten.relay => batch_flatten.relay} | 0 ...en.relay.data => batch_flatten.relay.data} | 0 .../tests/data/batch_matmul.expect | 141 +++--- .../relay-futil/tests/data/batch_matmul.relay | 4 +- .../tests/data/batch_matmul.relay.data | 14 +- .../relay-futil/tests/data/bias_add.expect | 36 ++ .../relay-futil/tests/data/bias_add.relay | 5 + .../tests/data/bias_add.relay.data | 14 + .../relay-futil/tests/fixed_point_add.expect | 98 ++++ .../relay-futil/tests/fixed_point_add.relay | 5 + frontends/relay-futil/tests/let3.expect | 52 ++- frontends/relay-futil/tests/let3.relay | 2 +- frontends/relay-futil/tests/relu.expect | 152 +++++++ frontends/relay-futil/tests/relu.relay | 6 + frontends/relay-futil/utilities.py | 59 ++- 27 files changed, 1115 insertions(+), 361 deletions(-) create mode 100644 frontends/relay-futil/tests/bias_add.expect create mode 100644 frontends/relay-futil/tests/bias_add.relay rename frontends/relay-futil/tests/data/{tensor3d_batch_flatten.expect => batch_flatten.expect} (100%) rename frontends/relay-futil/tests/data/{tensor3d_batch_flatten.relay => batch_flatten.relay} (100%) rename frontends/relay-futil/tests/data/{tensor3d_batch_flatten.relay.data => batch_flatten.relay.data} (100%) create mode 100644 frontends/relay-futil/tests/data/bias_add.expect create mode 100644 frontends/relay-futil/tests/data/bias_add.relay create mode 100644 frontends/relay-futil/tests/data/bias_add.relay.data create mode 100644 frontends/relay-futil/tests/fixed_point_add.expect create mode 100644 frontends/relay-futil/tests/fixed_point_add.relay create mode 100644 frontends/relay-futil/tests/relu.expect create mode 100644 frontends/relay-futil/tests/relu.relay diff --git a/frontends/relay-futil/README.md b/frontends/relay-futil/README.md index 848f4abd3d..a47b0b3e0b 100644 --- a/frontends/relay-futil/README.md +++ b/frontends/relay-futil/README.md @@ -31,7 +31,11 @@ Installation cd ../topi/python && python3 setup.py bdist_wheel pip3 install --user dist/topi-*.whl -7. Install [Dahlia][], which is used when lowering from Relay to FuTIL. +7. To run the [MLP net][] and [VGG net][] examples, install `pytest`: + + pip3 install pytest + +8. Install [Dahlia][], which is used when lowering from Relay to FuTIL. The `fuse` executable is expected to be on your path. Alternatively, it will check to see if the environment variable `$DAHLIA_EXEC` is set. Run an Example @@ -60,6 +64,8 @@ The Relay text format parser requires ANTLR, so also do this: Then, just type `runt` to run the tests. +[vgg net]: https://github.com/apache/incubator-tvm/blob/main/python/tvm/relay/testing/vgg.py +[mlp net]: https://github.com/apache/incubator-tvm/blob/main/python/tvm/relay/testing/mlp.py [dahlia]: https://github.com/cucapra/dahlia#set-it-up [tvm]: https://tvm.apache.org [tvm-install]: https://tvm.apache.org/docs/install/from_source.html#developers-get-source-from-github diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 5f922522ae..2b66ac0f7e 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -9,7 +9,7 @@ from dahlia_functions import * # Mapping from Relay binary calls to the respective Dahlia operator. -BuiltInBinaryCalls = {'add': '+', 'multiply': '*', 'subtract': '-'} +BuiltInBinaryCalls = {'add': '+', 'divide': '/', 'multiply': '*', 'subtract': '-'} class Relay2Futil(ExprFunctor): @@ -69,81 +69,71 @@ def get_dahlia_declaration(self, function_name, cells, args): if function_name in BuiltInBinaryCalls: op = BuiltInBinaryCalls[function_name] if input_type == PrimitiveType.Memory1D: - function = tensor1d_op - name = self.relay_id(f'tensor1d_{function_name}') - if input_type == PrimitiveType.Memory2D: - function = tensor2d_op - name = self.relay_id(f'tensor2d_{function_name}') + function, name = tensor1d_op, f'tensor1d_{function_name}' + elif input_type == PrimitiveType.Memory2D: + function, name = tensor2d_op, f'tensor2d_{function_name}' + if function_name == "nn.batch_flatten": - if input_type == PrimitiveType.Memory3D: - function = tensor3d_batch_flatten - name = self.relay_id(f'{function.__name__}') + if input_type == PrimitiveType.Memory3D: function = tensor3d_batch_flatten elif function_name == "nn.batch_matmul": function = batch_matmul - name = self.relay_id(f'{function.__name__}') + elif function_name == "nn.bias_add": + if input_type == PrimitiveType.Memory2D: function = tensor2d_bias_add + elif function_name == "nn.relu": + if input_type == PrimitiveType.Memory2D: function = tensor2d_relu - assert function != None and name != None, f'{function_name} with type {input_type} is not supported.' - return DahliaDeclaration(component_name=name, decl_name=self.id(name), op=op, inputs=args, function=function) + assert function != None, f'{function_name} with type {input_type} is not supported.' + if name == None: name = function.__name__ + return DahliaDeclaration(component_name=self.relay_id(name), decl_name=self.id(name), op=op, inputs=args, + function=function) def visit_var(self, var): name = self.relay_id(var.name_hint) - if self.main.contains_primitive(name): return [cell] - - data, type = get_memory_parameters(var.type_annotation) + # Do not add duplicate primitives to main. + if self.main.contains_primitive(name): return cell + data, type, data_type = get_memory_parameters(var.type_annotation) dahlia_name = self.produce_dahlia_name(name, type) - return [FCell(dahlia_name=dahlia_name, primitive=FPrimitive(name=name, data=data, type=type))] + return FCell(dahlia_name=dahlia_name, + primitive=FPrimitive(name=name, data=data, data_type=data_type, type=type)) def visit_let(self, let): - variable = self.visit(let.var) - body = self.visit(let.body) - values = self.visit(let.value) - - output = variable[0] - for value in flatten(values): + output, body, values = self.visit(let.var), self.visit(let.body), self.visit(let.value) + for value in values: if not value.is_dahlia_declaration(): continue value.dahlia_declaration.output = output value.dahlia_declaration.invoke() return [body, values] def visit_constant(self, const): - type = const.data.dtype - shape = const.data.shape - data = [get_bitwidth(type), int(const.data.asnumpy())] - name = self.id("const") - return [FCell(primitive=FPrimitive(name=name, data=data, type=PrimitiveType.Constant))] + type, shape = const.data.dtype, const.data.shape + name, data, data_type = self.id("const"), [get_bitwidth(type), int(const.data.asnumpy())], get_type(type) + return FCell(primitive=FPrimitive(name=name, data=data, data_type=data_type, type=PrimitiveType.Constant)) def visit_call(self, call): - cells = [] - args = [] + cells, args = [], [] for arg in call.args: argument = self.visit(arg) cells.append(argument) args.append(argument) - cells = flatten(cells) - cells.append(FCell(dahlia_declaration=self.get_dahlia_declaration(call.op.name, cells, flatten(args)))) + cells.append(FCell(dahlia_declaration=self.get_dahlia_declaration(call.op.name, cells, args))) return cells def visit_function(self, function): body = self.visit(function.body) - for cell in flatten(body): self.main.add_cell(cell) if not cell.is_dahlia_declaration(): continue self.dahlia_components.append(cell.dahlia_declaration.program) - - build_main(self.main) # Groups, wires, connections. + build_main_controls(self.main) return pp_component(self.main) def infer_type(expr: Function) -> Function: infer_types_pass = relay.transform.InferType() - fuse_op__pass = relay.transform.FuseOps() - to_normal_pass = relay.transform.ToANormalForm() mod = ir.IRModule() mod['main'] = expr mod = infer_types_pass(mod) - ret = mod['main'] - return ret + return mod['main'] def compile(program) -> str: diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 6ed7a6df5c..fd0b3db770 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -33,20 +33,14 @@ def lower_dahlia_program(prog, component_name): } ''' - program_string = "" - for line in prog.splitlines(): program_string += f'{line}\n' - + program_string = '\n'.join(prog.splitlines()) with NamedTemporaryFile() as tf0, NamedTemporaryFile() as tf1, NamedTemporaryFile() as tf2: tf0.write(bytes(program_string, 'UTF-8')) - tf0.seek(0) - tf1.seek(0) - tf2.seek(0) + tf0.seek(0), tf1.seek(0), tf2.seek(0) fuse_binary = os.environ['DAHLIA_EXEC'] if 'DAHLIA_EXEC' in os.environ else 'fuse' - command = \ - f""" - {fuse_binary} {tf0.name} --lower -b=futil -n={component_name} > {tf1.name} {NO_ERR} \ - && cd ../../ && cargo run -- {tf1.name} -p externalize > {tf2.name} {NO_ERR} - """ + command = f""" + {fuse_binary} {tf0.name} --lower -b=futil -n={component_name} > {tf1.name} {NO_ERR} \ + && cargo run -- {tf1.name} -l ../../ -p externalize > {tf2.name} {NO_ERR}""" subprocess.Popen(command, stdout=subprocess.PIPE, shell=True).communicate() component = tf2.read().decode()[len(IMPORT_STATEMENT):] # Skip over importing the primitives library. return component @@ -55,17 +49,19 @@ def lower_dahlia_program(prog, component_name): def tensor1d_op(declaration): op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive + assert op1.data_type == op2.data_type and op2.data_type == res.data_type assert op1.type == PrimitiveType.Memory1D and op1.type == op2.type and op2.type == res.type assert op1.data[0] == op2.data[0] and op1.data[0] == res.data[0] and op1.data[1] == op2.data[1] assert op1.data[2] == op2.data[2] and op2.data[2] == res.data[2] and op2.data[1] == res.data[1] bitwidth, size, index_size = op1.data[0], op1.data[1], op1.data[2] - return lower_dahlia_program(f""" - decl {op1.name}: ubit<{bitwidth}>[{size}]; - decl {op2.name}: ubit<{bitwidth}>[{size}]; - decl {res.name}: ubit<{bitwidth}>[{size}]; + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{size}]; + decl {op2.name}: {op2.data_type}<{bitwidth}>[{size}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{size}]; for (let i: ubit<{index_size}> = 0..{size}) {{ {res.name}[i] := {op1.name}[i] {declaration.op} {op2.name}[i]; - }}""", declaration.component_name) + }}""" + return lower_dahlia_program(program, declaration.component_name) def tensor2d_op(declaration): @@ -75,16 +71,16 @@ def tensor2d_op(declaration): assert bitwidth == op2.data[0] and op1.data[0] == res.data[0] and op2.data[4] == res.data[4] assert size0 == op2.data[1] and op2.data[1] == res.data[1] and size1 == op2.data[2] and op2.data[2] == res.data[2] assert index_size0 == op2.data[3] and op2.data[3] == res.data[3] and index_size1 == op2.data[4] - - return lower_dahlia_program(f""" - decl {op1.name}: ubit<{bitwidth}>[{size0}][{size1}]; - decl {op2.name}: ubit<{bitwidth}>[{size0}][{size1}]; - decl {res.name}: ubit<{bitwidth}>[{size0}][{size1}]; + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{size0}][{size1}]; + decl {op2.name}: {op2.data_type}<{bitwidth}>[{size0}][{size1}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}]; for (let i: ubit<{index_size0}> = 0..{size0}) {{ for (let j: ubit<{index_size1}> = 0..{size1}) {{ {res.name}[i][j] := {op1.name}[i][j] {declaration.op} {op2.name}[i][j]; }} - }}""", declaration.component_name) + }}""" + return lower_dahlia_program(program, declaration.component_name) def tensor3d_batch_flatten(declaration): @@ -96,10 +92,11 @@ def tensor3d_batch_flatten(declaration): res_index_size0, res_index_size1 = res.data[3], res.data[4] assert op1.type == PrimitiveType.Memory3D and res_size1 == op1_size1 * op1_size2 and res_size0 == op1_size0 - assert res.type == PrimitiveType.Memory2D and res_bitwidth == bitwidth - return lower_dahlia_program(f""" - decl {op1.name}: ubit<{bitwidth}>[{op1_size0}][{op1_size1}][{op1_size2}]; - decl {res.name}: ubit<{bitwidth}>[{res_size0}][{res_size1}]; + assert res.type == PrimitiveType.Memory2D and res_bitwidth == bitwidth and op1.data_type == res.data_type + assert op1.data_type == res.data_type + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{op1_size0}][{op1_size1}][{op1_size2}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{res_size0}][{res_size1}]; let l: ubit<{res_index_size1}> = 0; for (let i: ubit<{op1_index_size0}> = 0..{op1_size0}) {{ for (let j: ubit<{op1_index_size1}> = 0..{op1_size1}) {{ @@ -108,32 +105,109 @@ def tensor3d_batch_flatten(declaration): l := l + 1; }} }} - }}""", declaration.component_name) + }}""" + return lower_dahlia_program(program, declaration.component_name) + + +def tensor2d_bias_add(declaration): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.bias_add""" + # Assumes default value axis=1 is passed in. + data, bias, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive + bitwidth = data.data[0] + size0, size1, index_size0, index_size1 = data.data[1], data.data[2], data.data[3], data.data[4] + bias_size, bias_index_size = bias.data[1], bias.data[2] + + assert bitwidth == res.data[0] and bitwidth == bias.data[0] + assert size0 == res.data[1] and size1 == res.data[2] and bias_size == size1 + assert bias.type == PrimitiveType.Memory1D and data.type == PrimitiveType.Memory2D and data.type == res.type + program = f""" + decl {data.name}: {data.data_type}<{bitwidth}>[{size0}][{size1}]; + decl {bias.name}: {bias.data_type}<{bitwidth}>[{bias_size}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}]; + for (let i: ubit<{index_size0}> = 0..{size0}) {{ + for (let j: ubit<{index_size1}> = 0..{size1}) {{ + {res.name}[i][j] := {data.name}[i][j] + {bias.name}[j]; + }} + }} + """ + return lower_dahlia_program(program, declaration.component_name) + + +# TODO(cgyurgyik): +# 1. This won't work for fixed point currently, since Dahlia +# will not take fixed point operands for the `>` operator. +# 2. Without signed bit array support, this is also meaningless. +def tensor2d_relu(declaration): + op1, res = declaration.inputs[0].primitive, declaration.output.primitive + assert res.data_type == 'ubit', f'{res.data_type} is not currently supported for ReLU.' + bitwidth, op1_size0, op1_size1 = op1.data[0], op1.data[1], op1.data[2] + op1_index_size0, op1_index_size1 = op1.data[3], op1.data[4] + res_bitwidth, res_size0, res_size1 = res.data[0], res.data[1], res.data[2] + res_index_size0, res_index_size1 = res.data[3], res.data[4] + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{op1_size0}][{op1_size1}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{res_size0}][{res_size1}]; + let zero: {op1.data_type}<{bitwidth}> = 0; + for (let i: ubit<{op1_index_size0}> = 0..{op1_size0}) {{ + for (let j: ubit<{op1_index_size1}> = 0..{op1_size1}) {{ + if ({op1.name}[i][j] > zero) {{ + {res.name}[i][j] := {op1.name}[i][j]; + }} else {{ + {res.name}[i][j] := 0; + }} + }} + }} + """ + return lower_dahlia_program(program, declaration.component_name) def batch_matmul(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_matmul""" - assert False, "Unimplemented. nn.batch_matmul currently does not execute properly." op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive bitwidth, M1_size0, M1_size1, M1_size2 = op1.data[0], op1.data[1], op1.data[2], op1.data[3] M1_index_size0, M1_index_size1, M1_index_size2 = op1.data[4], op1.data[5], op1.data[6] M2_size0, M2_size1, M2_size2 = op2.data[1], op2.data[2], op2.data[3] M2_index_size0, M2_index_size1, M2_index_size2 = op2.data[4], op2.data[5], op2.data[6] assert op1.type == PrimitiveType.Memory3D and op1.type == op2.type and op2.type == res.type - assert M2_size1 == M1_size2 and bitwidth == op2.data[0] and M1_size0 == M2_size0 - - return lower_dahlia_program(f""" - decl {op1.name}: ubit<{bitwidth}>[{M1_size0}][{M1_size1}][{M1_size2}]; - decl {op2.name}: ubit<{bitwidth}>[{M2_size0}][{M2_size1}][{M2_size2}]; - decl {res.name}: ubit<{bitwidth}>[{M1_size0}][{M1_size1}][{M2_size2}]; - for (let i: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ - for (let j: ubit<{M1_index_size1}> = 0..{M1_size1}) {{ - for (let k: ubit<{M2_index_size2}> = 0..{M2_size2}) {{ - for (let l: ubit<{M1_index_size2}> = 0..{M1_size2}) {{ - let prod = {op1.name}[i][j][l] * {op2.name}[i][l][k]; + assert op1.data_type == op2.data_type and op2.data_type == res.data_type + + # 1. Get transpose of second operand. + # 2. Conduct temporary = op1 * transpose(op2). + # 3. Write temporary value to return value.* + # * This third step may not be necessary, but trying to conduct the matrix multiply + # directly with the return value declared resulted in incorrect outputs. + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{M1_size0}][{M1_size1}][{M1_size2}]; + decl {op2.name}: {op2.data_type}<{bitwidth}>[{M2_size0}][{M2_size1}][{M2_size2}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M1_size1}][{M2_size1}]; + let transpose_{op2.name}: {op2.data_type}<{bitwidth}>[{M2_size0}][{M2_size2}][{M2_size1}]; + let temporary_{res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M1_size1}][{M2_size1}]; + for (let batch: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ + for (let i: ubit<{M2_index_size1}> = 0..{M2_size1}) {{ + for (let j: ubit<{M2_index_size2}> = 0..{M2_size2}) {{ + transpose_{op2.name}[batch][j][i] := {op2.name}[batch][i][j]; + }} + }} + }} + + for (let batch: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ + for (let i: ubit<{M1_index_size1}> = 0..{M1_size1}) {{ + for (let j: ubit<{M2_index_size1}> = 0..{M2_size1}) {{ + for (let k: ubit<{M2_index_size2}> = 0..{M2_size2}) {{ + let product = {op1.name}[batch][i][k] * transpose_{op2.name}[batch][k][j]; }} combine {{ - {res.name}[i][j][k] += prod; + temporary_{res.name}[batch][i][j] += product; }} }} }} - }}""", declaration.component_name) + }} + + for (let batch: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ + for (let i: ubit<{M1_index_size1}> = 0..{M1_size1}) {{ + for (let j: ubit<{M2_index_size1}> = 0..{M2_size1}) {{ + {res.name}[batch][i][j] := temporary_{res.name}[batch][i][j]; + }} + }} + }} + """ + return lower_dahlia_program(program, declaration.component_name) diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index d5f3d15011..d0f2ebd730 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -5,63 +5,72 @@ def add(): - """Add together two variables in Relay. - """ x = relay.var('x', shape=(), dtype="int32") y = relay.var('y', shape=(), dtype="int32") return relay.Function([x, y], relay.add(x, y)) -def tensor_add(): - """Add together two 2-dimensional tensors in Relay. - """ +def tensor_subtract(): x = relay.var("x", relay.TensorType((2, 4), "int32")) y = relay.var("y", relay.TensorType((2, 4), "int32")) - return relay.Function([x, y], relay.add(x, y)) + return relay.Function([x, y], relay.subtract(x, y)) def batch_flatten(): - """Flattens all dimensions except for the batch dimension. - """ x = relay.var("x", relay.TensorType((2, 5, 5), "int32")) return relay.Function([x], relay.nn.batch_flatten(x)) def batch_matmul(): - """Add together two 2-dimensional tensors in Relay. - """ - x = relay.var("x", relay.TensorType((1, 3, 2), "int32")) - y = relay.var("y", relay.TensorType((1, 2, 3), "int32")) + x = relay.var('x', shape=[1, 3, 3], dtype='float32') + y = relay.var('y', shape=[1, 3, 3], dtype='float32') return relay.Function([x, y], relay.nn.batch_matmul(x, y)) +def bias_add(): + x = relay.var('x', shape=[2, 4], dtype='float32') + bias = relay.var('bias', shape=[4], dtype='float32') + return relay.Function([x, bias], relay.nn.bias_add(data=x, bias=bias)) + + +def relu(): + x = relay.var('x', shape=[2, 4], dtype='int32') + return relay.Function([x], relay.nn.relu(x)) + + def mlp_net(): - """The MLP test from Relay. - """ + """The MLP test from Relay.""" from tvm.relay.testing import mlp return mlp.get_net(1) -ALL_FUNCS = [add, tensor_add, batch_flatten, batch_matmul, mlp_net] +def vgg_net(): + """The VGG test from Relay.""" + from tvm.relay.testing import vgg + return vgg.get_net(batch_size=1, image_shape=(3, 224, 224), num_classes=10, dtype='int32', num_layers=11, + batch_norm=True) + + +ALL_FUNCS = [add, tensor_subtract, batch_flatten, batch_matmul, bias_add, relu, mlp_net, vgg_net] FUNC_NAMES = list(map(lambda x: x.__name__, ALL_FUNCS)) -def simple_example(): - if '-h' in sys.argv[1:]: - supported_functions = [] +def run_example(): + input = sys.argv[1:] + if '-h' in input or input == []: print("- To see FuTIL output:\n$ python3 example.py ") print("- To see Relay IR:\n$ python3 example.py -r") - print("\n- Supported function names:") - for f in FUNC_NAMES: print(f' {f}') + print("\n- Supported functions:") + (lambda x: print(', '.join(x)))(FUNC_NAMES) return func = None # See if the command line contains a function name. for option in ALL_FUNCS: - if option.__name__ in sys.argv[1:]: + if option.__name__ in input: func = option() break if func == None: - print("For help:\n$ python3 example.py -h") + print(f'Function {input} is not a supported. To see a list of functions:\n$ python3 example.py -h') return # Try optimizing the Relay IR with a few built-in passes. @@ -71,10 +80,10 @@ def simple_example(): relay.transform.ToANormalForm(), ]) - mod = tvm.IRModule.from_expr(func) - mod_opt = seq(mod) + mod_opt = tvm.IRModule.from_expr(func) + mod_opt = seq(mod_opt) func = mod_opt['main'] - if '-r' in sys.argv[1:]: + if '-r' in input: # Dump the Relay representation (for educational purposes). print(func) else: @@ -83,4 +92,4 @@ def simple_example(): if __name__ == '__main__': - simple_example() + run_example() diff --git a/frontends/relay-futil/futil_ast.py b/frontends/relay-futil/futil_ast.py index cef66aef0a..1d469c8888 100644 --- a/frontends/relay-futil/futil_ast.py +++ b/frontends/relay-futil/futil_ast.py @@ -26,6 +26,7 @@ class FPrimitive: name: str data: List[int] type: PrimitiveType + data_type: str @dataclass @@ -153,8 +154,6 @@ class FDeclaration: Represents a FuTIL declaration. ''' name: str - intermediary_inputs: List[Cell] = None - intermediary_output: Cell = None component: FComponent = None diff --git a/frontends/relay-futil/pretty_print.py b/frontends/relay-futil/pretty_print.py index 6b8cbe4632..53eb47662d 100644 --- a/frontends/relay-futil/pretty_print.py +++ b/frontends/relay-futil/pretty_print.py @@ -61,9 +61,7 @@ def pp_component(component: FComponent): continue subcomponents.append(pp_cell(cell)) cells = mk_block("cells", '\n'.join(subcomponents)) - inputs, outputs = pp_component_signature(component) - wires = mk_block("wires", '\n'.join(pp_connections(component))) controls = "" if component.controls == None else '\n'.join(pp_control(component)) @@ -75,24 +73,27 @@ def pp_component(component: FComponent): def pp_cell(cell: FCell): if cell.is_primitive(): data = cell.primitive.data - bitwidth = str(data[0]) + data_type = cell.primitive.data_type + if data_type == 'ubit' or data_type == 'bit': bitwidth = str(data[0]) + # `fix` / `ufix` will have bitwidth form: . We only want TotalWidth. + if data_type == 'ufix' or data_type == 'fix': bitwidth = str(data[0]).split(',')[0] if cell.primitive.type == PrimitiveType.Register: return f'{cell.primitive.name} = prim std_reg({bitwidth});' - elif cell.primitive.type == PrimitiveType.Constant: + if cell.primitive.type == PrimitiveType.Constant: value = str(data[1]) return f'{cell.primitive.name} = prim std_const({bitwidth}, {value});' - elif cell.primitive.type == PrimitiveType.Memory1D: + if cell.primitive.type == PrimitiveType.Memory1D: size = str(data[1]) index_size = str(data[2]) return f'{cell.primitive.name} = prim std_mem_d1({bitwidth}, {size}, {index_size});' - elif cell.primitive.type == PrimitiveType.Memory2D: + if cell.primitive.type == PrimitiveType.Memory2D: size0 = str(data[1]) size1 = str(data[2]) index_size0 = str(data[3]) index_size1 = str(data[4]) return f'{cell.primitive.name} = prim std_mem_d2({bitwidth}, ' \ f'{size0}, {size1}, {index_size0}, {index_size1});' - elif cell.primitive.type == PrimitiveType.Memory3D: + if cell.primitive.type == PrimitiveType.Memory3D: size0 = str(data[1]) size1 = str(data[2]) size2 = str(data[3]) @@ -101,11 +102,10 @@ def pp_cell(cell: FCell): index_size2 = str(data[6]) return f'{cell.primitive.name} = prim std_mem_d3({bitwidth}, ' \ f'{size0}, {size1}, {size2}, {index_size0}, {index_size1}, {index_size2});' - elif cell.primitive.type == PrimitiveType.BinOp: + if cell.primitive.type == PrimitiveType.BinOp: op = data[1] return f'{cell.primitive.name} = prim std_{op}({bitwidth});' - else: - assert False, f'FCell pretty print unimplemented for {cell} with name {cell.primitive.name}' + assert False, f'FCell pretty print unimplemented for {cell} with name {cell.primitive.name}' elif cell.is_declaration(): return f'{cell.declaration.name} = {cell.declaration.component.name};' elif cell.is_dahlia_declaration(): diff --git a/frontends/relay-futil/runt.toml b/frontends/relay-futil/runt.toml index 00e4dc96e6..26fd2172e8 100644 --- a/frontends/relay-futil/runt.toml +++ b/frontends/relay-futil/runt.toml @@ -6,7 +6,7 @@ paths = ["tests/*.relay"] cmd = "python3 compiler.py < {}" [[tests]] -name = "FuTIL to dat" +name = "Relay to FuTIL to dat" paths = ["tests/data/*.relay"] cmd = """ python3 compiler.py < {} > {}.expect && \ diff --git a/frontends/relay-futil/tests/batch_matmul.expect b/frontends/relay-futil/tests/batch_matmul.expect index 7e2e4c3cc9..0bf73d4754 100644 --- a/frontends/relay-futil/tests/batch_matmul.expect +++ b/frontends/relay-futil/tests/batch_matmul.expect @@ -1,148 +1,322 @@ import "primitives/std.lib"; -component batch_matmul(go: 1, clk: 1, a0_0_0_read_data: 32, a0_0_0_done: 1, b0_0_0_read_data: 32, b0_0_0_done: 1, x0_0_0_read_data: 32, x0_0_0_done: 1) -> (done: 1, a0_0_0_addr0: 2, a0_0_0_addr1: 2, a0_0_0_addr2: 2, a0_0_0_write_data: 32, a0_0_0_write_en: 1, a0_0_0_clk: 1, b0_0_0_addr0: 2, b0_0_0_addr1: 2, b0_0_0_addr2: 2, b0_0_0_write_data: 32, b0_0_0_write_en: 1, b0_0_0_clk: 1, x0_0_0_addr0: 2, x0_0_0_addr1: 2, x0_0_0_addr2: 2, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1) { +component batch_matmul(go: 1, clk: 1, a0_0_0_read_data: 32, a0_0_0_done: 1, b0_0_0_read_data: 32, b0_0_0_done: 1, x0_0_0_read_data: 32, x0_0_0_done: 1) -> (done: 1, a0_0_0_addr0: 3, a0_0_0_addr1: 3, a0_0_0_addr2: 3, a0_0_0_write_data: 32, a0_0_0_write_en: 1, a0_0_0_clk: 1, b0_0_0_addr0: 3, b0_0_0_addr1: 3, b0_0_0_addr2: 3, b0_0_0_write_data: 32, b0_0_0_write_en: 1, b0_0_0_clk: 1, x0_0_0_addr0: 3, x0_0_0_addr1: 3, x0_0_0_addr2: 3, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1) { cells { a_read0_0 = prim std_reg(32); - add0 = prim std_add(32); - add1 = prim std_add(2); - add2 = prim std_add(2); - add3 = prim std_add(2); - add4 = prim std_add(2); + add0 = prim std_add(3); + add1 = prim std_add(3); + add10 = prim std_add(3); + add2 = prim std_add(3); + add3 = prim std_add(32); + add4 = prim std_add(3); + add5 = prim std_add(3); + add6 = prim std_add(3); + add7 = prim std_add(3); + add8 = prim std_add(3); + add9 = prim std_add(3); b_read0_0 = prim std_reg(32); + batch0 = prim std_reg(3); + batch1 = prim std_reg(3); + batch2 = prim std_reg(3); bin_read0_0 = prim std_reg(32); - const0 = prim std_const(2, 0); - const1 = prim std_const(2, 2); - const10 = prim std_const(2, 1); - const11 = prim std_const(2, 1); - const2 = prim std_const(2, 0); - const3 = prim std_const(2, 2); - const4 = prim std_const(2, 0); - const5 = prim std_const(2, 2); - const6 = prim std_const(2, 0); - const7 = prim std_const(2, 2); - const8 = prim std_const(2, 1); - const9 = prim std_const(2, 1); - i0 = prim std_reg(2); - j0 = prim std_reg(2); - k0 = prim std_reg(2); - l0 = prim std_reg(2); - le0 = prim std_le(2); - le1 = prim std_le(2); - le2 = prim std_le(2); - le3 = prim std_le(2); + const0 = prim std_const(3, 0); + const1 = prim std_const(3, 3); + const10 = prim std_const(3, 3); + const11 = prim std_const(3, 0); + const12 = prim std_const(3, 6); + const13 = prim std_const(3, 0); + const14 = prim std_const(3, 6); + const15 = prim std_const(3, 0); + const16 = prim std_const(3, 4); + const17 = prim std_const(3, 1); + const18 = prim std_const(3, 1); + const19 = prim std_const(3, 1); + const2 = prim std_const(3, 0); + const20 = prim std_const(3, 1); + const21 = prim std_const(3, 0); + const22 = prim std_const(3, 3); + const23 = prim std_const(3, 0); + const24 = prim std_const(3, 6); + const25 = prim std_const(3, 0); + const26 = prim std_const(3, 6); + const27 = prim std_const(3, 1); + const28 = prim std_const(3, 1); + const29 = prim std_const(3, 1); + const3 = prim std_const(3, 6); + const4 = prim std_const(3, 0); + const5 = prim std_const(3, 4); + const6 = prim std_const(3, 1); + const7 = prim std_const(3, 1); + const8 = prim std_const(3, 1); + const9 = prim std_const(3, 0); + i0 = prim std_reg(3); + i1 = prim std_reg(3); + i2 = prim std_reg(3); + j0 = prim std_reg(3); + j1 = prim std_reg(3); + j2 = prim std_reg(3); + k0 = prim std_reg(3); + le0 = prim std_le(3); + le1 = prim std_le(3); + le2 = prim std_le(3); + le3 = prim std_le(3); + le4 = prim std_le(3); + le5 = prim std_le(3); + le6 = prim std_le(3); + le7 = prim std_le(3); + le8 = prim std_le(3); + le9 = prim std_le(3); mult_pipe0 = prim std_mult_pipe(32); - prod_0 = prim std_reg(32); + product_0 = prim std_reg(32); + temporary_x0_0_0 = prim std_mem_d3(32, 4, 7, 7, 3, 3, 3); + temporary_x_read0_0 = prim std_reg(32); + transpose_b0_0_0 = prim std_mem_d3(32, 4, 5, 7, 3, 3, 3); + transpose_b_read0_0 = prim std_reg(32); } wires { group cond0<"static"=0> { cond0[done] = 1'd1; - le0.left = i0.out; + le0.left = batch0.out; le0.right = const1.out; } group cond1<"static"=0> { cond1[done] = 1'd1; - le1.left = j0.out; + le1.left = i0.out; le1.right = const3.out; } group cond2<"static"=0> { cond2[done] = 1'd1; - le2.left = k0.out; + le2.left = j0.out; le2.right = const5.out; } group cond3<"static"=0> { cond3[done] = 1'd1; - le3.left = l0.out; - le3.right = const7.out; + le3.left = batch1.out; + le3.right = const10.out; + } + group cond4<"static"=0> { + cond4[done] = 1'd1; + le4.left = i1.out; + le4.right = const12.out; + } + group cond5<"static"=0> { + cond5[done] = 1'd1; + le5.left = j1.out; + le5.right = const14.out; + } + group cond6<"static"=0> { + cond6[done] = 1'd1; + le6.left = k0.out; + le6.right = const16.out; + } + group cond7<"static"=0> { + cond7[done] = 1'd1; + le7.left = batch2.out; + le7.right = const22.out; + } + group cond8<"static"=0> { + cond8[done] = 1'd1; + le8.left = i2.out; + le8.right = const24.out; + } + group cond9<"static"=0> { + cond9[done] = 1'd1; + le9.left = j2.out; + le9.right = const26.out; } group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; + batch0.in = const0.out; + batch0.write_en = 1'd1; + let0[done] = batch0.done; } group let1<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let1[done] = j0.done; + i0.in = const2.out; + i0.write_en = 1'd1; + let1[done] = i0.done; + } + group let10<"static"=1> { + i2.in = const23.out; + i2.write_en = 1'd1; + let10[done] = i2.done; + } + group let11<"static"=1> { + j2.in = const25.out; + j2.write_en = 1'd1; + let11[done] = j2.done; } group let2<"static"=1> { - k0.in = const4.out; - k0.write_en = 1'd1; - let2[done] = k0.done; + j0.in = const4.out; + j0.write_en = 1'd1; + let2[done] = j0.done; } group let3<"static"=1> { - l0.in = const6.out; - l0.write_en = 1'd1; - let3[done] = l0.done; + batch1.in = const9.out; + batch1.write_en = 1'd1; + let3[done] = batch1.done; + } + group let4<"static"=1> { + i1.in = const11.out; + i1.write_en = 1'd1; + let4[done] = i1.done; + } + group let5<"static"=1> { + j1.in = const13.out; + j1.write_en = 1'd1; + let5[done] = j1.done; } - group let4<"static"=4> { + group let6<"static"=1> { + k0.in = const15.out; + k0.write_en = 1'd1; + let6[done] = k0.done; + } + group let7<"static"=4> { bin_read0_0.in = mult_pipe0.out; bin_read0_0.write_en = mult_pipe0.done; - let4[done] = bin_read0_0.done; + let7[done] = bin_read0_0.done; mult_pipe0.left = a_read0_0.out; - mult_pipe0.right = b_read0_0.out; + mult_pipe0.right = transpose_b_read0_0.out; mult_pipe0.go = !mult_pipe0.done ? 1'd1; } - group let5<"static"=1> { - prod_0.in = bin_read0_0.out; - prod_0.write_en = 1'd1; - let5[done] = prod_0.done; + group let8<"static"=1> { + product_0.in = bin_read0_0.out; + product_0.write_en = 1'd1; + let8[done] = product_0.done; } - group upd0<"static"=1> { - a_read0_0.write_en = 1'd1; - a0_0_0_addr2 = l0.out; - a0_0_0_addr1 = j0.out; - a0_0_0_addr0 = i0.out; - a_read0_0.in = 1'd1 ? a0_0_0_read_data; - upd0[done] = a_read0_0.done ? 1'd1; + group let9<"static"=1> { + batch2.in = const21.out; + batch2.write_en = 1'd1; + let9[done] = batch2.done; } - group upd1<"static"=1> { + group upd0<"static"=1> { b_read0_0.write_en = 1'd1; - b0_0_0_addr2 = k0.out; - b0_0_0_addr1 = l0.out; - b0_0_0_addr0 = i0.out; + b0_0_0_addr2 = j0.out; + b0_0_0_addr1 = i0.out; + b0_0_0_addr0 = batch0.out; b_read0_0.in = 1'd1 ? b0_0_0_read_data; - upd1[done] = b_read0_0.done ? 1'd1; + upd0[done] = b_read0_0.done ? 1'd1; } - group upd2<"static"=1> { - x0_0_0_addr2 = k0.out; - x0_0_0_addr1 = j0.out; - x0_0_0_addr0 = i0.out; + group upd1<"static"=1> { + transpose_b0_0_0.addr2 = i0.out; + transpose_b0_0_0.addr1 = j0.out; + transpose_b0_0_0.addr0 = batch0.out; + transpose_b0_0_0.write_en = 1'd1; + transpose_b0_0_0.write_data = 1'd1 ? b_read0_0.out; + upd1[done] = transpose_b0_0_0.done ? 1'd1; + } + group upd10<"static"=1> { + i1.write_en = 1'd1; + add6.left = i1.out; + add6.right = const19.out; + i1.in = 1'd1 ? add6.out; + upd10[done] = i1.done ? 1'd1; + } + group upd11<"static"=1> { + batch1.write_en = 1'd1; + add7.left = batch1.out; + add7.right = const20.out; + batch1.in = 1'd1 ? add7.out; + upd11[done] = batch1.done ? 1'd1; + } + group upd12<"static"=1> { + temporary_x_read0_0.write_en = 1'd1; + temporary_x0_0_0.addr2 = j2.out; + temporary_x0_0_0.addr1 = i2.out; + temporary_x0_0_0.addr0 = batch2.out; + temporary_x_read0_0.in = 1'd1 ? temporary_x0_0_0.read_data; + upd12[done] = temporary_x_read0_0.done ? 1'd1; + } + group upd13<"static"=1> { + x0_0_0_addr2 = j2.out; + x0_0_0_addr1 = i2.out; + x0_0_0_addr0 = batch2.out; x0_0_0_write_en = 1'd1; - add0.left = x0_0_0_read_data; - add0.right = prod_0.out; - x0_0_0_addr2 = k0.out; - x0_0_0_addr1 = j0.out; - x0_0_0_addr0 = i0.out; - x0_0_0_write_data = 1'd1 ? add0.out; - upd2[done] = x0_0_0_done ? 1'd1; + x0_0_0_write_data = 1'd1 ? temporary_x_read0_0.out; + upd13[done] = x0_0_0_done ? 1'd1; + } + group upd14<"static"=1> { + j2.write_en = 1'd1; + add8.left = j2.out; + add8.right = const27.out; + j2.in = 1'd1 ? add8.out; + upd14[done] = j2.done ? 1'd1; + } + group upd15<"static"=1> { + i2.write_en = 1'd1; + add9.left = i2.out; + add9.right = const28.out; + i2.in = 1'd1 ? add9.out; + upd15[done] = i2.done ? 1'd1; + } + group upd16<"static"=1> { + batch2.write_en = 1'd1; + add10.left = batch2.out; + add10.right = const29.out; + batch2.in = 1'd1 ? add10.out; + upd16[done] = batch2.done ? 1'd1; + } + group upd2<"static"=1> { + j0.write_en = 1'd1; + add0.left = j0.out; + add0.right = const6.out; + j0.in = 1'd1 ? add0.out; + upd2[done] = j0.done ? 1'd1; } group upd3<"static"=1> { - l0.write_en = 1'd1; - add1.left = l0.out; - add1.right = const8.out; - l0.in = 1'd1 ? add1.out; - upd3[done] = l0.done ? 1'd1; + i0.write_en = 1'd1; + add1.left = i0.out; + add1.right = const7.out; + i0.in = 1'd1 ? add1.out; + upd3[done] = i0.done ? 1'd1; } group upd4<"static"=1> { - k0.write_en = 1'd1; - add2.left = k0.out; - add2.right = const9.out; - k0.in = 1'd1 ? add2.out; - upd4[done] = k0.done ? 1'd1; + batch0.write_en = 1'd1; + add2.left = batch0.out; + add2.right = const8.out; + batch0.in = 1'd1 ? add2.out; + upd4[done] = batch0.done ? 1'd1; } group upd5<"static"=1> { - j0.write_en = 1'd1; - add3.left = j0.out; - add3.right = const10.out; - j0.in = 1'd1 ? add3.out; - upd5[done] = j0.done ? 1'd1; + a_read0_0.write_en = 1'd1; + a0_0_0_addr2 = k0.out; + a0_0_0_addr1 = i1.out; + a0_0_0_addr0 = batch1.out; + a_read0_0.in = 1'd1 ? a0_0_0_read_data; + upd5[done] = a_read0_0.done ? 1'd1; } group upd6<"static"=1> { - i0.write_en = 1'd1; - add4.left = i0.out; - add4.right = const11.out; - i0.in = 1'd1 ? add4.out; - upd6[done] = i0.done ? 1'd1; + transpose_b_read0_0.write_en = 1'd1; + transpose_b0_0_0.addr2 = j1.out; + transpose_b0_0_0.addr1 = k0.out; + transpose_b0_0_0.addr0 = batch1.out; + transpose_b_read0_0.in = 1'd1 ? transpose_b0_0_0.read_data; + upd6[done] = transpose_b_read0_0.done ? 1'd1; + } + group upd7<"static"=1> { + temporary_x0_0_0.addr2 = j1.out; + temporary_x0_0_0.addr1 = i1.out; + temporary_x0_0_0.addr0 = batch1.out; + temporary_x0_0_0.write_en = 1'd1; + add3.left = temporary_x0_0_0.read_data; + add3.right = product_0.out; + temporary_x0_0_0.addr2 = j1.out; + temporary_x0_0_0.addr1 = i1.out; + temporary_x0_0_0.addr0 = batch1.out; + temporary_x0_0_0.write_data = 1'd1 ? add3.out; + upd7[done] = temporary_x0_0_0.done ? 1'd1; + } + group upd8<"static"=1> { + k0.write_en = 1'd1; + add4.left = k0.out; + add4.right = const17.out; + k0.in = 1'd1 ? add4.out; + upd8[done] = k0.done ? 1'd1; + } + group upd9<"static"=1> { + j1.write_en = 1'd1; + add5.left = j1.out; + add5.right = const18.out; + j1.in = 1'd1 ? add5.out; + upd9[done] = j1.done ? 1'd1; } } @@ -157,26 +331,66 @@ component batch_matmul(go: 1, clk: 1, a0_0_0_read_data: 32, a0_0_0_done: 1, b0_0 let2; while le2.out with cond2 { seq { - let3; - while le3.out with cond3 { + upd0; + upd1; + upd2; + } + } + upd3; + } + } + upd4; + } + } + let3; + while le3.out with cond3 { + seq { + let4; + while le4.out with cond4 { + seq { + let5; + while le5.out with cond5 { + seq { + let6; + while le6.out with cond6 { seq { par { - upd0; - upd1; + upd5; + upd6; } - let4; - let5; - upd2; - upd3; + let7; + let8; + upd7; + upd8; } } - upd4; + upd9; + } + } + upd10; + } + } + upd11; + } + } + let9; + while le7.out with cond7 { + seq { + let10; + while le8.out with cond8 { + seq { + let11; + while le9.out with cond9 { + seq { + upd12; + upd13; + upd14; } } - upd5; + upd15; } } - upd6; + upd16; } } } @@ -185,9 +399,9 @@ component batch_matmul(go: 1, clk: 1, a0_0_0_read_data: 32, a0_0_0_done: 1, b0_0 component main () -> () { cells { - x = prim std_mem_d3(32, 3, 3, 3, 2, 2, 2); - a = prim std_mem_d3(32, 3, 3, 3, 2, 2, 2); - b = prim std_mem_d3(32, 3, 3, 3, 2, 2, 2); + x = prim std_mem_d3(32, 4, 7, 7, 3, 3, 3); + a = prim std_mem_d3(32, 4, 7, 5, 3, 3, 3); + b = prim std_mem_d3(32, 4, 7, 5, 3, 3, 3); batch_matmul0 = batch_matmul; } wires { diff --git a/frontends/relay-futil/tests/batch_matmul.relay b/frontends/relay-futil/tests/batch_matmul.relay index 3c33743956..cdb972e9b0 100644 --- a/frontends/relay-futil/tests/batch_matmul.relay +++ b/frontends/relay-futil/tests/batch_matmul.relay @@ -1,6 +1,6 @@ v0.0.4 -fn (%a: Tensor[(3, 3, 3), int32], %b: Tensor[(3, 3, 3), int32]) -> Tensor[(3, 3, 3), int32] { - let %x: Tensor[(3, 3, 3), int32] = nn.batch_matmul(%a, %b); +fn (%a: Tensor[(4, 7, 5), int32], %b: Tensor[(4, 7, 5), int32]) { + let %x: Tensor[(4, 7, 7), int32] = nn.batch_matmul(%a, %b); %x } diff --git a/frontends/relay-futil/tests/bias_add.expect b/frontends/relay-futil/tests/bias_add.expect new file mode 100644 index 0000000000..ee66f8942d --- /dev/null +++ b/frontends/relay-futil/tests/bias_add.expect @@ -0,0 +1,131 @@ +import "primitives/std.lib"; + +component tensor2d_bias_add(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, bias0_addr0: 7, bias0_write_data: 32, bias0_write_en: 1, bias0_clk: 1, x0_0_addr0: 1, x0_0_addr1: 7, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 7, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { + cells { + add0 = prim fixed_p_std_add(32, 16, 16); + add1 = prim std_add(7); + add2 = prim std_add(1); + bias_read0_0 = prim std_reg(32); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(7, 0); + const3 = prim std_const(7, 63); + const4 = prim std_const(7, 1); + const5 = prim std_const(1, 1); + i0 = prim std_reg(1); + j0 = prim std_reg(7); + le0 = prim std_le(1); + le1 = prim std_le(7); + x_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let1[done] = j0.done; + } + group upd0<"static"=1> { + x_read0_0.write_en = 1'd1; + x0_0_addr1 = j0.out; + x0_0_addr0 = i0.out; + x_read0_0.in = 1'd1 ? x0_0_read_data; + upd0[done] = x_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + bias_read0_0.write_en = 1'd1; + bias0_addr0 = j0.out; + bias_read0_0.in = 1'd1 ? bias0_read_data; + upd1[done] = bias_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + x10_0_addr1 = j0.out; + x10_0_addr0 = i0.out; + x10_0_write_en = 1'd1; + add0.left = x_read0_0.out; + add0.right = bias_read0_0.out; + x10_0_write_data = 1'd1 ? add0.out; + upd2[done] = x10_0_done ? 1'd1; + } + group upd3<"static"=1> { + j0.write_en = 1'd1; + add1.left = j0.out; + add1.right = const4.out; + j0.in = 1'd1 ? add1.out; + upd3[done] = j0.done ? 1'd1; + } + group upd4<"static"=1> { + i0.write_en = 1'd1; + add2.left = i0.out; + add2.right = const5.out; + i0.in = 1'd1 ? add2.out; + upd4[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + par { + upd0; + upd1; + } + upd2; + upd3; + } + } + upd4; + } + } + } + } +} + +component main () -> () { + cells { + x1 = prim std_mem_d2(32, 1, 64, 1, 7); + x = prim std_mem_d2(32, 1, 64, 1, 7); + bias = prim std_mem_d1(32, 64, 7); + tensor2d_bias_add0 = tensor2d_bias_add; + } + wires { + group run_tensor2d_bias_add { + x.addr0 = tensor2d_bias_add0.x0_0_addr0; + tensor2d_bias_add0.x0_0_read_data = x.read_data; + x.addr1 = tensor2d_bias_add0.x0_0_addr1; + bias.addr0 = tensor2d_bias_add0.bias0_addr0; + tensor2d_bias_add0.bias0_read_data = bias.read_data; + x1.addr0 = tensor2d_bias_add0.x10_0_addr0; + x1.addr1 = tensor2d_bias_add0.x10_0_addr1; + x1.write_data = tensor2d_bias_add0.x10_0_write_data; + x1.write_en = tensor2d_bias_add0.x10_0_write_en; + tensor2d_bias_add0.x10_0_done = x1.done; + tensor2d_bias_add0.go = 1'd1; + run_tensor2d_bias_add[done] = tensor2d_bias_add0.done ? 1'd1; + } + } + control { + seq { + run_tensor2d_bias_add; + } + } +} diff --git a/frontends/relay-futil/tests/bias_add.relay b/frontends/relay-futil/tests/bias_add.relay new file mode 100644 index 0000000000..6b90020ed3 --- /dev/null +++ b/frontends/relay-futil/tests/bias_add.relay @@ -0,0 +1,7 @@ +v0.0.4 +fn (%x: Tensor[(1, 64), float32], %bias: Tensor[(64), float32]) { + let %x1: Tensor[(1, 64), float32] = nn.bias_add(%x, %bias); + %x1 +} + + diff --git a/frontends/relay-futil/tests/data/tensor3d_batch_flatten.expect b/frontends/relay-futil/tests/data/batch_flatten.expect similarity index 100% rename from frontends/relay-futil/tests/data/tensor3d_batch_flatten.expect rename to frontends/relay-futil/tests/data/batch_flatten.expect diff --git a/frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay b/frontends/relay-futil/tests/data/batch_flatten.relay similarity index 100% rename from frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay rename to frontends/relay-futil/tests/data/batch_flatten.relay diff --git a/frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay.data b/frontends/relay-futil/tests/data/batch_flatten.relay.data similarity index 100% rename from frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay.data rename to frontends/relay-futil/tests/data/batch_flatten.relay.data diff --git a/frontends/relay-futil/tests/data/batch_matmul.expect b/frontends/relay-futil/tests/data/batch_matmul.expect index f5bbcf1366..cfc67febca 100644 --- a/frontends/relay-futil/tests/data/batch_matmul.expect +++ b/frontends/relay-futil/tests/data/batch_matmul.expect @@ -3,52 +3,25 @@ [ [ 1, - 1, - 1 - ], - [ - 1, - 1, - 1 - ], - [ - 1, - 1, - 1 - ] - ], - [ - [ - 1, - 1, - 1 - ], - [ - 1, - 1, - 1 + 2, + 3 ], [ 1, - 1, - 1 + 2, + 3 ] ], [ [ 1, - 1, - 1 - ], - [ - 1, - 1, - 1 + 2, + 3 ], [ 1, - 1, - 1 + 2, + 3 ] ] ], @@ -56,105 +29,99 @@ [ [ 1, - 1, - 1 - ], - [ - 1, - 1, - 1 + 2, + 3 ], [ 1, - 1, - 1 + 2, + 3 ] ], [ [ 1, - 1, - 1 + 2, + 3 ], [ 1, - 1, - 1 + 2, + 3 + ] + ] + ], + "temporary_x0_0_0": [ + [ + [ + 14, + 14 ], [ - 1, - 1, - 1 + 14, + 14 ] ], [ [ - 1, - 1, - 1 - ], - [ - 1, - 1, - 1 + 14, + 14 ], [ - 1, - 1, - 1 + 14, + 14 ] ] ], - "x": [ + "transpose_b0_0_0": [ [ [ - 3, - 3, - 3 + 1, + 1 ], [ - 3, - 3, - 3 + 2, + 2 ], [ - 3, 3, 3 ] ], [ [ - 3, - 3, - 3 + 1, + 1 ], [ - 3, - 3, - 3 + 2, + 2 ], [ - 3, 3, 3 ] - ], + ] + ], + "x": [ [ [ - 3, - 3, - 3 + 14, + 14 ], [ - 3, - 3, - 3 + 14, + 14 + ] + ], + [ + [ + 14, + 14 ], [ - 3, - 3, - 3 + 14, + 14 ] ] ] diff --git a/frontends/relay-futil/tests/data/batch_matmul.relay b/frontends/relay-futil/tests/data/batch_matmul.relay index 20f860a2b7..0ab8e09c9f 100644 --- a/frontends/relay-futil/tests/data/batch_matmul.relay +++ b/frontends/relay-futil/tests/data/batch_matmul.relay @@ -1,5 +1,5 @@ v0.0.4 -fn (%a: Tensor[(3, 3, 3), int32], %b: Tensor[(3, 3, 3), int32]) -> Tensor[(3, 3, 3), int32] { - let %x: Tensor[(3, 3, 3), int32] = nn.batch_matmul(%a, %b); +fn (%a: Tensor[(2, 2, 3), int32], %b: Tensor[(2, 2, 3), int32]) { + let %x = nn.batch_matmul(%a, %b); %x } \ No newline at end of file diff --git a/frontends/relay-futil/tests/data/batch_matmul.relay.data b/frontends/relay-futil/tests/data/batch_matmul.relay.data index 172b2d3ac2..6e7ef55c45 100644 --- a/frontends/relay-futil/tests/data/batch_matmul.relay.data +++ b/frontends/relay-futil/tests/data/batch_matmul.relay.data @@ -1,14 +1,22 @@ { "a": { - "data": [[[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]], + "data": [[[1,2,3], [1,2,3]], [[1,2,3], [1,2,3]]], "bitwidth": 32 }, "b": { - "data": [[[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]], + "data": [[[1,2,3], [1,2,3]], [[1,2,3], [1,2,3]]], "bitwidth": 32 }, "x": { - "data": [[[0, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 0], [0, 0, 0]]], + "data": [[[0,0], [0,0]], [[0,0], [0,0]]], + "bitwidth": 32 + }, + "temporary_x0_0_0": { + "data": [[[0,0], [0,0]], [[0,0], [0,0]]], + "bitwidth": 32 + }, + "transpose_b0_0_0": { + "data": [[[0,0], [0,0], [0,0]], [[0,0], [0,0], [0,0]]], "bitwidth": 32 } } \ No newline at end of file diff --git a/frontends/relay-futil/tests/data/bias_add.expect b/frontends/relay-futil/tests/data/bias_add.expect new file mode 100644 index 0000000000..4f6da23238 --- /dev/null +++ b/frontends/relay-futil/tests/data/bias_add.expect @@ -0,0 +1,36 @@ +{ + "bias": [ + 1, + 1, + 1, + 1 + ], + "x": [ + [ + 41, + 41, + 41, + 41 + ], + [ + 41, + 41, + 41, + 41 + ] + ], + "x1": [ + [ + 42, + 42, + 42, + 42 + ], + [ + 42, + 42, + 42, + 42 + ] + ] +} diff --git a/frontends/relay-futil/tests/data/bias_add.relay b/frontends/relay-futil/tests/data/bias_add.relay new file mode 100644 index 0000000000..4a1c58a64a --- /dev/null +++ b/frontends/relay-futil/tests/data/bias_add.relay @@ -0,0 +1,5 @@ +v0.0.4 +fn (%x: Tensor[(2, 4), float32], %bias: Tensor[(4), float32]) { + let %x1: Tensor[(2, 4), float32] = nn.bias_add(%x, %bias); + %x1 +} \ No newline at end of file diff --git a/frontends/relay-futil/tests/data/bias_add.relay.data b/frontends/relay-futil/tests/data/bias_add.relay.data new file mode 100644 index 0000000000..f1ef184ad6 --- /dev/null +++ b/frontends/relay-futil/tests/data/bias_add.relay.data @@ -0,0 +1,14 @@ +{ + "x": { + "data": [[41,41,41,41], [41,41,41,41]], + "bitwidth": 32 + }, + "bias": { + "data": [1,1,1,1], + "bitwidth": 32 + }, + "x1": { + "data": [[0,0,0,0], [0,0,0,0]], + "bitwidth": 32 + } +} \ No newline at end of file diff --git a/frontends/relay-futil/tests/fixed_point_add.expect b/frontends/relay-futil/tests/fixed_point_add.expect new file mode 100644 index 0000000000..3a08f950e8 --- /dev/null +++ b/frontends/relay-futil/tests/fixed_point_add.expect @@ -0,0 +1,98 @@ +import "primitives/std.lib"; + +component tensor1d_add(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { + cells { + add0 = prim fixed_p_std_add(32, 16, 16); + add1 = prim std_add(1); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(1, 1); + i0 = prim std_reg(1); + le0 = prim std_le(1); + x_read0_0 = prim std_reg(32); + y_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group upd0<"static"=1> { + x_read0_0.write_en = 1'd1; + x0_addr0 = i0.out; + x_read0_0.in = 1'd1 ? x0_read_data; + upd0[done] = x_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + y_read0_0.write_en = 1'd1; + y0_addr0 = i0.out; + y_read0_0.in = 1'd1 ? y0_read_data; + upd1[done] = y_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + z0_addr0 = i0.out; + z0_write_en = 1'd1; + add0.left = x_read0_0.out; + add0.right = y_read0_0.out; + z0_write_data = 1'd1 ? add0.out; + upd2[done] = z0_done ? 1'd1; + } + group upd3<"static"=1> { + i0.write_en = 1'd1; + add1.left = i0.out; + add1.right = const2.out; + i0.in = 1'd1 ? add1.out; + upd3[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + par { + upd0; + upd1; + } + upd2; + upd3; + } + } + } + } +} + +component main () -> () { + cells { + z = prim std_mem_d1(32, 1, 1); + x = prim std_mem_d1(32, 1, 1); + y = prim std_mem_d1(32, 1, 1); + tensor1d_add0 = tensor1d_add; + } + wires { + group run_tensor1d_add { + x.addr0 = tensor1d_add0.x0_addr0; + tensor1d_add0.x0_read_data = x.read_data; + y.addr0 = tensor1d_add0.y0_addr0; + tensor1d_add0.y0_read_data = y.read_data; + z.addr0 = tensor1d_add0.z0_addr0; + z.write_data = tensor1d_add0.z0_write_data; + z.write_en = tensor1d_add0.z0_write_en; + tensor1d_add0.z0_done = z.done; + tensor1d_add0.go = 1'd1; + run_tensor1d_add[done] = tensor1d_add0.done ? 1'd1; + } + } + control { + seq { + run_tensor1d_add; + } + } +} diff --git a/frontends/relay-futil/tests/fixed_point_add.relay b/frontends/relay-futil/tests/fixed_point_add.relay new file mode 100644 index 0000000000..4de39a28fb --- /dev/null +++ b/frontends/relay-futil/tests/fixed_point_add.relay @@ -0,0 +1,5 @@ +v0.0.4 +fn (%x: float32, %y: float32) { + let %z = add(%x, %y); + %z +} diff --git a/frontends/relay-futil/tests/let3.expect b/frontends/relay-futil/tests/let3.expect index 8885ca1785..3ef8c350f2 100644 --- a/frontends/relay-futil/tests/let3.expect +++ b/frontends/relay-futil/tests/let3.expect @@ -76,15 +76,16 @@ component tensor1d_multiply(go: 1, clk: 1, c0_read_data: 32, c0_done: 1, d0_read } } } -component tensor1d_add(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { +component tensor1d_divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { cells { a_read0_0 = prim std_reg(32); - add0 = prim std_add(32); - add1 = prim std_add(1); + add0 = prim std_add(1); + bin_read0_0 = prim std_reg(32); c_read0_0 = prim std_reg(32); const0 = prim std_const(1, 0); const1 = prim std_const(1, 0); const2 = prim std_const(1, 1); + div_pipe0 = prim std_div_pipe(32); i0 = prim std_reg(1); le0 = prim std_le(1); } @@ -99,6 +100,14 @@ component tensor1d_add(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data i0.write_en = 1'd1; let0[done] = i0.done; } + group let1<> { + bin_read0_0.in = div_pipe0.out; + bin_read0_0.write_en = div_pipe0.done; + let1[done] = bin_read0_0.done; + div_pipe0.left = c_read0_0.out; + div_pipe0.right = a_read0_0.out; + div_pipe0.go = !div_pipe0.done ? 1'd1; + } group upd0<"static"=1> { c_read0_0.write_en = 1'd1; c0_addr0 = i0.out; @@ -114,16 +123,14 @@ component tensor1d_add(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data group upd2<"static"=1> { d0_addr0 = i0.out; d0_write_en = 1'd1; - add0.left = c_read0_0.out; - add0.right = a_read0_0.out; - d0_write_data = 1'd1 ? add0.out; + d0_write_data = 1'd1 ? bin_read0_0.out; upd2[done] = d0_done ? 1'd1; } group upd3<"static"=1> { i0.write_en = 1'd1; - add1.left = i0.out; - add1.right = const2.out; - i0.in = 1'd1 ? add1.out; + add0.left = i0.out; + add0.right = const2.out; + i0.in = 1'd1 ? add0.out; upd3[done] = i0.done ? 1'd1; } } @@ -137,6 +144,7 @@ component tensor1d_add(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data upd0; upd1; } + let1; upd2; upd3; } @@ -220,7 +228,7 @@ component main () -> () { d = prim std_mem_d1(32, 1, 1); tensor1d_multiply0 = tensor1d_multiply; a = prim std_mem_d1(32, 1, 1); - tensor1d_add0 = tensor1d_add; + tensor1d_divide0 = tensor1d_divide; b = prim std_mem_d1(32, 1, 1); tensor1d_subtract0 = tensor1d_subtract; } @@ -237,17 +245,17 @@ component main () -> () { tensor1d_subtract0.go = 1'd1; run_tensor1d_subtract[done] = tensor1d_subtract0.done ? 1'd1; } - group run_tensor1d_add { - c.addr0 = tensor1d_add0.c0_addr0; - tensor1d_add0.c0_read_data = c.read_data; - a.addr0 = tensor1d_add0.a0_addr0; - tensor1d_add0.a0_read_data = a.read_data; - d.addr0 = tensor1d_add0.d0_addr0; - d.write_data = tensor1d_add0.d0_write_data; - d.write_en = tensor1d_add0.d0_write_en; - tensor1d_add0.d0_done = d.done; - tensor1d_add0.go = 1'd1; - run_tensor1d_add[done] = tensor1d_add0.done ? 1'd1; + group run_tensor1d_divide { + c.addr0 = tensor1d_divide0.c0_addr0; + tensor1d_divide0.c0_read_data = c.read_data; + a.addr0 = tensor1d_divide0.a0_addr0; + tensor1d_divide0.a0_read_data = a.read_data; + d.addr0 = tensor1d_divide0.d0_addr0; + d.write_data = tensor1d_divide0.d0_write_data; + d.write_en = tensor1d_divide0.d0_write_en; + tensor1d_divide0.d0_done = d.done; + tensor1d_divide0.go = 1'd1; + run_tensor1d_divide[done] = tensor1d_divide0.done ? 1'd1; } group run_tensor1d_multiply { c.addr0 = tensor1d_multiply0.c0_addr0; @@ -265,7 +273,7 @@ component main () -> () { control { seq { run_tensor1d_subtract; - run_tensor1d_add; + run_tensor1d_divide; run_tensor1d_multiply; } } diff --git a/frontends/relay-futil/tests/let3.relay b/frontends/relay-futil/tests/let3.relay index 29d96dd3e9..50aa9a8064 100644 --- a/frontends/relay-futil/tests/let3.relay +++ b/frontends/relay-futil/tests/let3.relay @@ -1,7 +1,7 @@ v0.0.4 fn (%a: int32, %b: int32) { let %c = subtract(%a, %b); - let %d = add(%c, %a); + let %d = divide(%c, %a); let %e = multiply(%c, %d); %e } diff --git a/frontends/relay-futil/tests/relu.expect b/frontends/relay-futil/tests/relu.expect new file mode 100644 index 0000000000..098c7bcc1d --- /dev/null +++ b/frontends/relay-futil/tests/relu.expect @@ -0,0 +1,152 @@ +import "primitives/std.lib"; + +component tensor2d_relu(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, x0_0_addr0: 2, x0_0_addr1: 3, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 2, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { + cells { + add0 = prim std_add(3); + add1 = prim std_add(2); + const0 = prim std_const(32, 0); + const1 = prim std_const(2, 0); + const2 = prim std_const(2, 1); + const3 = prim std_const(3, 0); + const4 = prim std_const(3, 3); + const5 = prim std_const(32, 0); + const6 = prim std_const(3, 1); + const7 = prim std_const(2, 1); + gt0 = prim std_gt(32); + i0 = prim std_reg(2); + j0 = prim std_reg(3); + le0 = prim std_le(2); + le1 = prim std_le(3); + x_read0_0 = prim std_reg(32); + x_read1_0 = prim std_reg(32); + zero_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const2.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const4.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + gt0.left = x_read0_0.out; + gt0.right = zero_0.out; + } + group let0<"static"=1> { + zero_0.in = const0.out; + zero_0.write_en = 1'd1; + let0[done] = zero_0.done; + } + group let1<"static"=1> { + i0.in = const1.out; + i0.write_en = 1'd1; + let1[done] = i0.done; + } + group let2<"static"=1> { + j0.in = const3.out; + j0.write_en = 1'd1; + let2[done] = j0.done; + } + group upd0<"static"=1> { + x_read0_0.write_en = 1'd1; + x0_0_addr1 = j0.out; + x0_0_addr0 = i0.out; + x_read0_0.in = 1'd1 ? x0_0_read_data; + upd0[done] = x_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + x_read1_0.write_en = 1'd1; + x0_0_addr1 = j0.out; + x0_0_addr0 = i0.out; + x_read1_0.in = 1'd1 ? x0_0_read_data; + upd1[done] = x_read1_0.done ? 1'd1; + } + group upd2<"static"=1> { + x10_0_addr1 = j0.out; + x10_0_addr0 = i0.out; + x10_0_write_en = 1'd1; + x10_0_write_data = 1'd1 ? x_read1_0.out; + upd2[done] = x10_0_done ? 1'd1; + } + group upd3<"static"=1> { + x10_0_addr1 = j0.out; + x10_0_addr0 = i0.out; + x10_0_write_en = 1'd1; + x10_0_write_data = 1'd1 ? const5.out; + upd3[done] = x10_0_done ? 1'd1; + } + group upd4<"static"=1> { + j0.write_en = 1'd1; + add0.left = j0.out; + add0.right = const6.out; + j0.in = 1'd1 ? add0.out; + upd4[done] = j0.done ? 1'd1; + } + group upd5<"static"=1> { + i0.write_en = 1'd1; + add1.left = i0.out; + add1.right = const7.out; + i0.in = 1'd1 ? add1.out; + upd5[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + let1; + while le0.out with cond0 { + seq { + let2; + while le1.out with cond1 { + seq { + upd0; + if gt0.out with cond2 { + seq { + upd1; + upd2; + } + } else { + upd3; + } + upd4; + } + } + upd5; + } + } + } + } +} + +component main () -> () { + cells { + x1 = prim std_mem_d2(32, 2, 4, 2, 3); + x = prim std_mem_d2(32, 2, 4, 2, 3); + tensor2d_relu0 = tensor2d_relu; + } + wires { + group run_tensor2d_relu { + x.addr0 = tensor2d_relu0.x0_0_addr0; + tensor2d_relu0.x0_0_read_data = x.read_data; + x.addr1 = tensor2d_relu0.x0_0_addr1; + x1.addr0 = tensor2d_relu0.x10_0_addr0; + x1.addr1 = tensor2d_relu0.x10_0_addr1; + x1.write_data = tensor2d_relu0.x10_0_write_data; + x1.write_en = tensor2d_relu0.x10_0_write_en; + tensor2d_relu0.x10_0_done = x1.done; + tensor2d_relu0.go = 1'd1; + run_tensor2d_relu[done] = tensor2d_relu0.done ? 1'd1; + } + } + control { + seq { + run_tensor2d_relu; + } + } +} diff --git a/frontends/relay-futil/tests/relu.relay b/frontends/relay-futil/tests/relu.relay new file mode 100644 index 0000000000..f8f324a033 --- /dev/null +++ b/frontends/relay-futil/tests/relu.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%x: Tensor[(2, 4), int32]) { + let %x1: Tensor[(2, 4), int32] = nn.relu(%x); + %x1 +} + diff --git a/frontends/relay-futil/utilities.py b/frontends/relay-futil/utilities.py index 89adfec513..8deeb217c1 100644 --- a/frontends/relay-futil/utilities.py +++ b/frontends/relay-futil/utilities.py @@ -17,25 +17,45 @@ def flatten(l): return new_list -def get_bitwidth(type): +def get_dahlia_data_type(relay_type): ''' - Quick and dirty way to get the bitwidth. + Gets the Dahlia data type from the given Relay type. + NOTE: Currently, Dahlia does not support signed types for arrays. ''' - t = str(type) - assert t[0:3] == 'int' or t[0:5] == 'float', f'{t} is not supported.' - begin = 3 if t[0:3] == 'int' else 5 # 'float' - return int(t[begin:len(t)]) + if 'int' in relay_type: return 'ubit' + if 'float' in relay_type: return 'ufix' + assert False, f'{relay_type} is not supported.' + + +def get_bitwidth(relay_type): + ''' + Gets the bitwidth from a Relay type. + If the relay_type is floating point of size N, returns a fixed point of size . + This lowers to a fixed point cell with `int_width` of size N/2, and a `fract_width` of size N/2. + ''' + type = str(relay_type) + length = len(type) + if 'int' in type: return type[3:length] + if 'float' in type: + width = int(type[5:length]) + return f'{width}, {int(width / 2)}' + assert False, f'{relay_type} is not supported.' def get_memory_parameters(type): ''' Acquires the memory parameters necessary to create a FuTIL memory primitive. + + A Tensor type in Relay is presented as: `Tensor[(dim1, dim2, ...), type]`. + For example, `Tensor[(2, 4), int32]` is a 2-dimensional tensor with data type int32. + + We then parse this to determine the corresponding FuTIL and Dahlia types. ''' t = str(type) + data_type = get_dahlia_data_type(t) if t[0:3] == 'int' or t[0:5] == 'float': - return [get_bitwidth(type), 1, 1], PrimitiveType.Memory1D + return [get_bitwidth(type), 1, 1], PrimitiveType.Memory1D, data_type assert t[0:6] == 'Tensor', f'{type} is not currently supported.' - string_type = t[t.find(")") + 3:t.find("]")] string_dimensions = t[t.find("(") + 1:t.find(")")] @@ -44,14 +64,19 @@ def get_memory_parameters(type): for dimension in tensor_dimensions: data.append(dimension) # Size. for dimension in tensor_dimensions: data.append(int(math.log2(dimension) + 1)) # Index size. - if len(tensor_dimensions) == 2: - type = PrimitiveType.Memory2D - elif len(tensor_dimensions) == 3: - type = PrimitiveType.Memory3D - return data, type + if len(tensor_dimensions) == 1: primitive_type = PrimitiveType.Memory1D + if len(tensor_dimensions) == 2: primitive_type = PrimitiveType.Memory2D + if len(tensor_dimensions) == 3: primitive_type = PrimitiveType.Memory3D + return data, primitive_type, data_type -def build_main(c: FComponent): +def build_main_controls(c: FComponent): + ''' + Builds the wires and control for the `main` component. + This is done by creating a group run_* with its respective + wiring for each Dahlia declaration, and adding it to the + control. + ''' dahlia_declarations = [] for cell in reversed(c.cells): if not cell.is_dahlia_declaration(): continue @@ -66,9 +91,9 @@ def build_main(c: FComponent): wires.append(FWire(f'{prim.name}.addr0', f'{declaration.decl_name}.{input.dahlia_name}_addr0')) wires.append( FWire(f'{declaration.decl_name}.{input.dahlia_name}_read_data', f'{prim.name}.read_data')) - if not prim.type == PrimitiveType.Memory2D and not prim.type == PrimitiveType.Memory3D: continue + if prim.type == PrimitiveType.Memory1D: continue wires.append(FWire(f'{prim.name}.addr1', f'{declaration.decl_name}.{input.dahlia_name}_addr1')) - if not prim.type == PrimitiveType.Memory3D: continue + if prim.type == PrimitiveType.Memory2D: continue wires.append(FWire(f'{prim.name}.addr2', f'{declaration.decl_name}.{input.dahlia_name}_addr2')) output = declaration.output @@ -87,6 +112,6 @@ def build_main(c: FComponent): wires.append(FWire(f'{group_name}[done]', f"{declaration.decl_name}.done ? 1'd1")) c.wires.append(FConnection(group=FGroup(name=group_name, wires=wires, attributes=[]))) - # Ensures that only group names make it into the Controls of a component. + # Ensures that only group names make it into the controls of a component. connections = list(filter(lambda w: w.is_group(), c.wires)) c.controls = [Seq(stmts=list(map(lambda w: w.group.name, connections)))] From 8980ed9b134ccd66e9299be0318190b5e267c7fd Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Thu, 12 Nov 2020 10:36:13 -0500 Subject: [PATCH 13/75] [relay] Clarify comment. --- frontends/relay-futil/dahlia_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index fd0b3db770..e2c5729d16 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -172,8 +172,8 @@ def batch_matmul(declaration): assert op1.data_type == op2.data_type and op2.data_type == res.data_type # 1. Get transpose of second operand. - # 2. Conduct temporary = op1 * transpose(op2). - # 3. Write temporary value to return value.* + # 2. Create temporary value `t`. Then, t = op1 * transpose(op2). + # 3. Copy temporary value to return value.* # * This third step may not be necessary, but trying to conduct the matrix multiply # directly with the return value declared resulted in incorrect outputs. program = f""" From 9b08d295a5612535d8c6f52f46fab6ee9fb2cd22 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 14 Nov 2020 20:55:40 -0500 Subject: [PATCH 14/75] Rename to batch_flatten. --- frontends/relay-futil/compiler.py | 2 +- frontends/relay-futil/dahlia_functions.py | 2 +- ...tch_flatten.expect => batch_flatten.expect} | 0 ...batch_flatten.relay => batch_flatten.relay} | 0 .../tests/data/tensor3d_batch_flatten.expect | 18 ------------------ .../data/tensor3d_batch_flatten.relay.data | 10 ---------- .../tests/tensor3d_batch_flatten.relay | 6 ------ 7 files changed, 2 insertions(+), 36 deletions(-) rename frontends/relay-futil/tests/{tensor3d_batch_flatten.expect => batch_flatten.expect} (100%) rename frontends/relay-futil/tests/{data/tensor3d_batch_flatten.relay => batch_flatten.relay} (100%) delete mode 100644 frontends/relay-futil/tests/data/tensor3d_batch_flatten.expect delete mode 100644 frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay.data delete mode 100644 frontends/relay-futil/tests/tensor3d_batch_flatten.relay diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 2b66ac0f7e..93da8c9f5e 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -74,7 +74,7 @@ def get_dahlia_declaration(self, function_name, cells, args): function, name = tensor2d_op, f'tensor2d_{function_name}' if function_name == "nn.batch_flatten": - if input_type == PrimitiveType.Memory3D: function = tensor3d_batch_flatten + if input_type == PrimitiveType.Memory3D: function = batch_flatten elif function_name == "nn.batch_matmul": function = batch_matmul elif function_name == "nn.bias_add": diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index e2c5729d16..2c72f19b87 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -83,7 +83,7 @@ def tensor2d_op(declaration): return lower_dahlia_program(program, declaration.component_name) -def tensor3d_batch_flatten(declaration): +def batch_flatten(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_flatten""" op1, res = declaration.inputs[0].primitive, declaration.output.primitive bitwidth, op1_size0, op1_size1, op1_size2 = op1.data[0], op1.data[1], op1.data[2], op1.data[3] diff --git a/frontends/relay-futil/tests/tensor3d_batch_flatten.expect b/frontends/relay-futil/tests/batch_flatten.expect similarity index 100% rename from frontends/relay-futil/tests/tensor3d_batch_flatten.expect rename to frontends/relay-futil/tests/batch_flatten.expect diff --git a/frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay b/frontends/relay-futil/tests/batch_flatten.relay similarity index 100% rename from frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay rename to frontends/relay-futil/tests/batch_flatten.relay diff --git a/frontends/relay-futil/tests/data/tensor3d_batch_flatten.expect b/frontends/relay-futil/tests/data/tensor3d_batch_flatten.expect deleted file mode 100644 index 4d55d4d415..0000000000 --- a/frontends/relay-futil/tests/data/tensor3d_batch_flatten.expect +++ /dev/null @@ -1,18 +0,0 @@ -{ - "x": [ - [ - 1, - 2 - ], - [ - 3, - 4 - ] - ], - "x1": [ - 1, - 2, - 3, - 4 - ] -} diff --git a/frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay.data b/frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay.data deleted file mode 100644 index b6c5eae239..0000000000 --- a/frontends/relay-futil/tests/data/tensor3d_batch_flatten.relay.data +++ /dev/null @@ -1,10 +0,0 @@ -{ - "x": { - "data": [[1, 2], [3, 4]], - "bitwidth": 32 - }, - "x1": { - "data": [1, 2, 3, 4], - "bitwidth": 32 - } -} \ No newline at end of file diff --git a/frontends/relay-futil/tests/tensor3d_batch_flatten.relay b/frontends/relay-futil/tests/tensor3d_batch_flatten.relay deleted file mode 100644 index 2a5e223fec..0000000000 --- a/frontends/relay-futil/tests/tensor3d_batch_flatten.relay +++ /dev/null @@ -1,6 +0,0 @@ -v0.0.4 -fn (%x: Tensor[(1, 2, 2), int32]) -> Tensor[(1, 4), int32] { - let %x1: Tensor[(1, 4), int32] = nn.batch_flatten(%x); - %x1 -} - From 806e93d2ca8a1ff657ec9965db56850d718217bc Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 14 Nov 2020 20:58:59 -0500 Subject: [PATCH 15/75] Fix batch_flatten, add test for 1d tensor binary ops --- .../relay-futil/tests/batch_flatten.expect | 30 ++--- .../relay-futil/tests/tensor1d_mult.expect | 106 ++++++++++++++++++ .../relay-futil/tests/tensor1d_mult.relay | 6 + 3 files changed, 127 insertions(+), 15 deletions(-) create mode 100644 frontends/relay-futil/tests/tensor1d_mult.expect create mode 100644 frontends/relay-futil/tests/tensor1d_mult.relay diff --git a/frontends/relay-futil/tests/batch_flatten.expect b/frontends/relay-futil/tests/batch_flatten.expect index b04fd2d7b3..6927e4ad85 100644 --- a/frontends/relay-futil/tests/batch_flatten.expect +++ b/frontends/relay-futil/tests/batch_flatten.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component tensor3d_batch_flatten(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, x0_0_0_addr0: 1, x0_0_0_addr1: 2, x0_0_0_addr2: 2, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { +component batch_flatten(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, x0_0_0_addr0: 1, x0_0_0_addr1: 2, x0_0_0_addr2: 2, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { cells { add0 = prim std_add(3); add1 = prim std_add(2); @@ -139,26 +139,26 @@ component main () -> () { cells { x1 = prim std_mem_d2(32, 1, 4, 1, 3); x = prim std_mem_d3(32, 1, 2, 2, 1, 2, 2); - tensor3d_batch_flatten0 = tensor3d_batch_flatten; + batch_flatten0 = batch_flatten; } wires { - group run_tensor3d_batch_flatten { - x.addr0 = tensor3d_batch_flatten0.x0_0_0_addr0; - tensor3d_batch_flatten0.x0_0_0_read_data = x.read_data; - x.addr1 = tensor3d_batch_flatten0.x0_0_0_addr1; - x.addr2 = tensor3d_batch_flatten0.x0_0_0_addr2; - x1.addr0 = tensor3d_batch_flatten0.x10_0_addr0; - x1.addr1 = tensor3d_batch_flatten0.x10_0_addr1; - x1.write_data = tensor3d_batch_flatten0.x10_0_write_data; - x1.write_en = tensor3d_batch_flatten0.x10_0_write_en; - tensor3d_batch_flatten0.x10_0_done = x1.done; - tensor3d_batch_flatten0.go = 1'd1; - run_tensor3d_batch_flatten[done] = tensor3d_batch_flatten0.done ? 1'd1; + group run_batch_flatten { + x.addr0 = batch_flatten0.x0_0_0_addr0; + batch_flatten0.x0_0_0_read_data = x.read_data; + x.addr1 = batch_flatten0.x0_0_0_addr1; + x.addr2 = batch_flatten0.x0_0_0_addr2; + x1.addr0 = batch_flatten0.x10_0_addr0; + x1.addr1 = batch_flatten0.x10_0_addr1; + x1.write_data = batch_flatten0.x10_0_write_data; + x1.write_en = batch_flatten0.x10_0_write_en; + batch_flatten0.x10_0_done = x1.done; + batch_flatten0.go = 1'd1; + run_batch_flatten[done] = batch_flatten0.done ? 1'd1; } } control { seq { - run_tensor3d_batch_flatten; + run_batch_flatten; } } } diff --git a/frontends/relay-futil/tests/tensor1d_mult.expect b/frontends/relay-futil/tests/tensor1d_mult.expect new file mode 100644 index 0000000000..d6086cd33d --- /dev/null +++ b/frontends/relay-futil/tests/tensor1d_mult.expect @@ -0,0 +1,106 @@ +import "primitives/std.lib"; + +component tensor1d_multiply(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, x10_read_data: 32, x10_done: 1, y0_read_data: 32, y0_done: 1) -> (done: 1, x0_addr0: 3, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, x10_addr0: 3, x10_write_data: 32, x10_write_en: 1, x10_clk: 1, y0_addr0: 3, y0_write_data: 32, y0_write_en: 1, y0_clk: 1) { + cells { + add0 = prim std_add(3); + bin_read0_0 = prim std_reg(32); + const0 = prim std_const(3, 0); + const1 = prim std_const(3, 3); + const2 = prim std_const(3, 1); + i0 = prim std_reg(3); + le0 = prim std_le(3); + mult_pipe0 = prim std_mult_pipe(32); + x_read0_0 = prim std_reg(32); + y_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=4> { + bin_read0_0.in = mult_pipe0.out; + bin_read0_0.write_en = mult_pipe0.done; + let1[done] = bin_read0_0.done; + mult_pipe0.left = x_read0_0.out; + mult_pipe0.right = y_read0_0.out; + mult_pipe0.go = !mult_pipe0.done ? 1'd1; + } + group upd0<"static"=1> { + x_read0_0.write_en = 1'd1; + x0_addr0 = i0.out; + x_read0_0.in = 1'd1 ? x0_read_data; + upd0[done] = x_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + y_read0_0.write_en = 1'd1; + y0_addr0 = i0.out; + y_read0_0.in = 1'd1 ? y0_read_data; + upd1[done] = y_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + x10_addr0 = i0.out; + x10_write_en = 1'd1; + x10_write_data = 1'd1 ? bin_read0_0.out; + upd2[done] = x10_done ? 1'd1; + } + group upd3<"static"=1> { + i0.write_en = 1'd1; + add0.left = i0.out; + add0.right = const2.out; + i0.in = 1'd1 ? add0.out; + upd3[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + par { + upd0; + upd1; + } + let1; + upd2; + upd3; + } + } + } + } +} + +component main () -> () { + cells { + x1 = prim std_mem_d1(32, 4, 3); + x = prim std_mem_d1(32, 4, 3); + y = prim std_mem_d1(32, 4, 3); + tensor1d_multiply0 = tensor1d_multiply; + } + wires { + group run_tensor1d_multiply { + x.addr0 = tensor1d_multiply0.x0_addr0; + tensor1d_multiply0.x0_read_data = x.read_data; + y.addr0 = tensor1d_multiply0.y0_addr0; + tensor1d_multiply0.y0_read_data = y.read_data; + x1.addr0 = tensor1d_multiply0.x10_addr0; + x1.write_data = tensor1d_multiply0.x10_write_data; + x1.write_en = tensor1d_multiply0.x10_write_en; + tensor1d_multiply0.x10_done = x1.done; + tensor1d_multiply0.go = 1'd1; + run_tensor1d_multiply[done] = tensor1d_multiply0.done ? 1'd1; + } + } + control { + seq { + run_tensor1d_multiply; + } + } +} diff --git a/frontends/relay-futil/tests/tensor1d_mult.relay b/frontends/relay-futil/tests/tensor1d_mult.relay new file mode 100644 index 0000000000..5c87584cf4 --- /dev/null +++ b/frontends/relay-futil/tests/tensor1d_mult.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%x: Tensor[(4), int32], %y: Tensor[(4), int32]) { + let %x1 = multiply(%x, %y); + %x1 +} + From 425a87c80d01b43ad9dbb85b33cf7aaf67d41be6 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 14 Nov 2020 21:08:16 -0500 Subject: [PATCH 16/75] [relay] add binary ops for 3d tensors. --- frontends/relay-futil/compiler.py | 2 + frontends/relay-futil/dahlia_functions.py | 23 +++ .../relay-futil/tests/tensor3d_divide.expect | 176 ++++++++++++++++++ .../relay-futil/tests/tensor3d_divide.relay | 6 + 4 files changed, 207 insertions(+) create mode 100644 frontends/relay-futil/tests/tensor3d_divide.expect create mode 100644 frontends/relay-futil/tests/tensor3d_divide.relay diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 93da8c9f5e..3ff627b37b 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -72,6 +72,8 @@ def get_dahlia_declaration(self, function_name, cells, args): function, name = tensor1d_op, f'tensor1d_{function_name}' elif input_type == PrimitiveType.Memory2D: function, name = tensor2d_op, f'tensor2d_{function_name}' + elif input_type == PrimitiveType.Memory3D: + function, name = tensor3d_op, f'tensor3d_{function_name}' if function_name == "nn.batch_flatten": if input_type == PrimitiveType.Memory3D: function = batch_flatten diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 2c72f19b87..63a4c09e42 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -83,6 +83,29 @@ def tensor2d_op(declaration): return lower_dahlia_program(program, declaration.component_name) +def tensor3d_op(declaration): + op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive + bitwidth, size0, size1, size2, = op1.data[0], op1.data[1], op1.data[2], op1.data[3] + index_size0, index_size1, index_size2 = op1.data[4], op1.data[5], op1.data[6] + assert op1.type == PrimitiveType.Memory3D and op1.type == op2.type and op2.type == res.type + assert bitwidth == op2.data[0] and op1.data[0] == res.data[0] and op2.data[4] == res.data[4] + assert size0 == op2.data[1] and op2.data[1] == res.data[1] and size1 == op2.data[2] and op2.data[2] == res.data[2] + assert index_size0 == op2.data[4] and op2.data[4] == res.data[4] and index_size1 == op2.data[5] + assert index_size2 == op2.data[6] and op2.data[6] == res.data[6] + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{size0}][{size1}][{size2}]; + decl {op2.name}: {op2.data_type}<{bitwidth}>[{size0}][{size1}][{size2}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}][{size2}]; + for (let i: ubit<{index_size0}> = 0..{size0}) {{ + for (let j: ubit<{index_size1}> = 0..{size1}) {{ + for (let k: ubit<{index_size2}> = 0..{size2}) {{ + {res.name}[i][j][k] := {op1.name}[i][j][k] {declaration.op} {op2.name}[i][j][k]; + }} + }} + }}""" + return lower_dahlia_program(program, declaration.component_name) + + def batch_flatten(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_flatten""" op1, res = declaration.inputs[0].primitive, declaration.output.primitive diff --git a/frontends/relay-futil/tests/tensor3d_divide.expect b/frontends/relay-futil/tests/tensor3d_divide.expect new file mode 100644 index 0000000000..10eb243cc1 --- /dev/null +++ b/frontends/relay-futil/tests/tensor3d_divide.expect @@ -0,0 +1,176 @@ +import "primitives/std.lib"; + +component tensor3d_divide(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_0_read_data: 32, x10_0_0_done: 1, y0_0_0_read_data: 32, y0_0_0_done: 1) -> (done: 1, x0_0_0_addr0: 3, x0_0_0_addr1: 3, x0_0_0_addr2: 3, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1, x10_0_0_addr0: 3, x10_0_0_addr1: 3, x10_0_0_addr2: 3, x10_0_0_write_data: 32, x10_0_0_write_en: 1, x10_0_0_clk: 1, y0_0_0_addr0: 3, y0_0_0_addr1: 3, y0_0_0_addr2: 3, y0_0_0_write_data: 32, y0_0_0_write_en: 1, y0_0_0_clk: 1) { + cells { + add0 = prim std_add(3); + add1 = prim std_add(3); + add2 = prim std_add(3); + bin_read0_0 = prim std_reg(32); + const0 = prim std_const(3, 0); + const1 = prim std_const(3, 3); + const2 = prim std_const(3, 0); + const3 = prim std_const(3, 4); + const4 = prim std_const(3, 0); + const5 = prim std_const(3, 5); + const6 = prim std_const(3, 1); + const7 = prim std_const(3, 1); + const8 = prim std_const(3, 1); + div_pipe0 = prim std_div_pipe(32); + i0 = prim std_reg(3); + j0 = prim std_reg(3); + k0 = prim std_reg(3); + le0 = prim std_le(3); + le1 = prim std_le(3); + le2 = prim std_le(3); + x_read0_0 = prim std_reg(32); + y_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = k0.out; + le2.right = const5.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let1[done] = j0.done; + } + group let2<"static"=1> { + k0.in = const4.out; + k0.write_en = 1'd1; + let2[done] = k0.done; + } + group let3<> { + bin_read0_0.in = div_pipe0.out; + bin_read0_0.write_en = div_pipe0.done; + let3[done] = bin_read0_0.done; + div_pipe0.left = x_read0_0.out; + div_pipe0.right = y_read0_0.out; + div_pipe0.go = !div_pipe0.done ? 1'd1; + } + group upd0<"static"=1> { + x_read0_0.write_en = 1'd1; + x0_0_0_addr2 = k0.out; + x0_0_0_addr1 = j0.out; + x0_0_0_addr0 = i0.out; + x_read0_0.in = 1'd1 ? x0_0_0_read_data; + upd0[done] = x_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + y_read0_0.write_en = 1'd1; + y0_0_0_addr2 = k0.out; + y0_0_0_addr1 = j0.out; + y0_0_0_addr0 = i0.out; + y_read0_0.in = 1'd1 ? y0_0_0_read_data; + upd1[done] = y_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + x10_0_0_addr2 = k0.out; + x10_0_0_addr1 = j0.out; + x10_0_0_addr0 = i0.out; + x10_0_0_write_en = 1'd1; + x10_0_0_write_data = 1'd1 ? bin_read0_0.out; + upd2[done] = x10_0_0_done ? 1'd1; + } + group upd3<"static"=1> { + k0.write_en = 1'd1; + add0.left = k0.out; + add0.right = const6.out; + k0.in = 1'd1 ? add0.out; + upd3[done] = k0.done ? 1'd1; + } + group upd4<"static"=1> { + j0.write_en = 1'd1; + add1.left = j0.out; + add1.right = const7.out; + j0.in = 1'd1 ? add1.out; + upd4[done] = j0.done ? 1'd1; + } + group upd5<"static"=1> { + i0.write_en = 1'd1; + add2.left = i0.out; + add2.right = const8.out; + i0.in = 1'd1 ? add2.out; + upd5[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + let2; + while le2.out with cond2 { + seq { + par { + upd0; + upd1; + } + let3; + upd2; + upd3; + } + } + upd4; + } + } + upd5; + } + } + } + } +} + +component main () -> () { + cells { + x1 = prim std_mem_d3(32, 4, 5, 6, 3, 3, 3); + x = prim std_mem_d3(32, 4, 5, 6, 3, 3, 3); + y = prim std_mem_d3(32, 4, 5, 6, 3, 3, 3); + tensor3d_divide0 = tensor3d_divide; + } + wires { + group run_tensor3d_divide { + x.addr0 = tensor3d_divide0.x0_0_0_addr0; + tensor3d_divide0.x0_0_0_read_data = x.read_data; + x.addr1 = tensor3d_divide0.x0_0_0_addr1; + x.addr2 = tensor3d_divide0.x0_0_0_addr2; + y.addr0 = tensor3d_divide0.y0_0_0_addr0; + tensor3d_divide0.y0_0_0_read_data = y.read_data; + y.addr1 = tensor3d_divide0.y0_0_0_addr1; + y.addr2 = tensor3d_divide0.y0_0_0_addr2; + x1.addr0 = tensor3d_divide0.x10_0_0_addr0; + x1.addr1 = tensor3d_divide0.x10_0_0_addr1; + x1.addr2 = tensor3d_divide0.x10_0_0_addr2; + x1.write_data = tensor3d_divide0.x10_0_0_write_data; + x1.write_en = tensor3d_divide0.x10_0_0_write_en; + tensor3d_divide0.x10_0_0_done = x1.done; + tensor3d_divide0.go = 1'd1; + run_tensor3d_divide[done] = tensor3d_divide0.done ? 1'd1; + } + } + control { + seq { + run_tensor3d_divide; + } + } +} diff --git a/frontends/relay-futil/tests/tensor3d_divide.relay b/frontends/relay-futil/tests/tensor3d_divide.relay new file mode 100644 index 0000000000..3a9c5a995d --- /dev/null +++ b/frontends/relay-futil/tests/tensor3d_divide.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%x: Tensor[(4,5,6), int32], %y: Tensor[(4,5,6), int32]) { + let %x1 = divide(%x, %y); + %x1 +} + From a24a4dbe48b2b2e8eea9651705017281ba3443b8 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 14 Nov 2020 21:13:52 -0500 Subject: [PATCH 17/75] Add additional assert statement. --- frontends/relay-futil/dahlia_functions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 63a4c09e42..2782620cf4 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -90,6 +90,7 @@ def tensor3d_op(declaration): assert op1.type == PrimitiveType.Memory3D and op1.type == op2.type and op2.type == res.type assert bitwidth == op2.data[0] and op1.data[0] == res.data[0] and op2.data[4] == res.data[4] assert size0 == op2.data[1] and op2.data[1] == res.data[1] and size1 == op2.data[2] and op2.data[2] == res.data[2] + assert size2 == op2.data[3] and op2.data[3] == res.data[3] assert index_size0 == op2.data[4] and op2.data[4] == res.data[4] and index_size1 == op2.data[5] assert index_size2 == op2.data[6] and op2.data[6] == res.data[6] program = f""" From 46dc53648bc6e569db9831316938d38bdd17f79b Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 20 Nov 2020 15:10:36 -0500 Subject: [PATCH 18/75] Add 4d tensor ops. --- frontends/relay-futil/compiler.py | 6 +- frontends/relay-futil/dahlia_functions.py | 116 +++++++++------ frontends/relay-futil/futil_ast.py | 1 + frontends/relay-futil/pretty_print.py | 11 ++ frontends/relay-futil/tests/relu.expect | 174 +++++++++++++++------- frontends/relay-futil/tests/relu.relay | 4 +- frontends/relay-futil/utilities.py | 1 + 7 files changed, 210 insertions(+), 103 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 3ff627b37b..dc5c47df93 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -56,6 +56,7 @@ def produce_dahlia_name(self, name, type): if type == PrimitiveType.Memory1D: return dahlia_name if type == PrimitiveType.Memory2D: return dahlia_name + "_0" if type == PrimitiveType.Memory3D: return dahlia_name + "_0_0" + if type == PrimitiveType.Memory4D: return dahlia_name + "_0_0_0" assert False, f'{name} with {type} is not supported yet.' def get_dahlia_declaration(self, function_name, cells, args): @@ -74,6 +75,8 @@ def get_dahlia_declaration(self, function_name, cells, args): function, name = tensor2d_op, f'tensor2d_{function_name}' elif input_type == PrimitiveType.Memory3D: function, name = tensor3d_op, f'tensor3d_{function_name}' + elif input_type == PrimitiveType.Memory4D: + function, name = tensor4d_op, f'tensor4d_{function_name}' if function_name == "nn.batch_flatten": if input_type == PrimitiveType.Memory3D: function = batch_flatten @@ -82,7 +85,8 @@ def get_dahlia_declaration(self, function_name, cells, args): elif function_name == "nn.bias_add": if input_type == PrimitiveType.Memory2D: function = tensor2d_bias_add elif function_name == "nn.relu": - if input_type == PrimitiveType.Memory2D: function = tensor2d_relu + assert input_type == PrimitiveType.Memory2D or input_type == PrimitiveType.Memory4D + function = relu assert function != None, f'{function_name} with type {input_type} is not supported.' if name == None: name = function.__name__ diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 2782620cf4..1f4bf8b80d 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -48,11 +48,6 @@ def lower_dahlia_program(prog, component_name): def tensor1d_op(declaration): op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive - - assert op1.data_type == op2.data_type and op2.data_type == res.data_type - assert op1.type == PrimitiveType.Memory1D and op1.type == op2.type and op2.type == res.type - assert op1.data[0] == op2.data[0] and op1.data[0] == res.data[0] and op1.data[1] == op2.data[1] - assert op1.data[2] == op2.data[2] and op2.data[2] == res.data[2] and op2.data[1] == res.data[1] bitwidth, size, index_size = op1.data[0], op1.data[1], op1.data[2] program = f""" decl {op1.name}: {op1.data_type}<{bitwidth}>[{size}]; @@ -67,10 +62,6 @@ def tensor1d_op(declaration): def tensor2d_op(declaration): op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive bitwidth, size0, size1, index_size0, index_size1 = op1.data[0], op1.data[1], op1.data[2], op1.data[3], op1.data[4] - assert op1.type == PrimitiveType.Memory2D and op1.type == op2.type and op2.type == res.type - assert bitwidth == op2.data[0] and op1.data[0] == res.data[0] and op2.data[4] == res.data[4] - assert size0 == op2.data[1] and op2.data[1] == res.data[1] and size1 == op2.data[2] and op2.data[2] == res.data[2] - assert index_size0 == op2.data[3] and op2.data[3] == res.data[3] and index_size1 == op2.data[4] program = f""" decl {op1.name}: {op1.data_type}<{bitwidth}>[{size0}][{size1}]; decl {op2.name}: {op2.data_type}<{bitwidth}>[{size0}][{size1}]; @@ -85,14 +76,8 @@ def tensor2d_op(declaration): def tensor3d_op(declaration): op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive - bitwidth, size0, size1, size2, = op1.data[0], op1.data[1], op1.data[2], op1.data[3] + bitwidth, size0, size1, size2 = op1.data[0], op1.data[1], op1.data[2], op1.data[3] index_size0, index_size1, index_size2 = op1.data[4], op1.data[5], op1.data[6] - assert op1.type == PrimitiveType.Memory3D and op1.type == op2.type and op2.type == res.type - assert bitwidth == op2.data[0] and op1.data[0] == res.data[0] and op2.data[4] == res.data[4] - assert size0 == op2.data[1] and op2.data[1] == res.data[1] and size1 == op2.data[2] and op2.data[2] == res.data[2] - assert size2 == op2.data[3] and op2.data[3] == res.data[3] - assert index_size0 == op2.data[4] and op2.data[4] == res.data[4] and index_size1 == op2.data[5] - assert index_size2 == op2.data[6] and op2.data[6] == res.data[6] program = f""" decl {op1.name}: {op1.data_type}<{bitwidth}>[{size0}][{size1}][{size2}]; decl {op2.name}: {op2.data_type}<{bitwidth}>[{size0}][{size1}][{size2}]; @@ -107,6 +92,26 @@ def tensor3d_op(declaration): return lower_dahlia_program(program, declaration.component_name) +def tensor4d_op(declaration): + op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive + bitwidth, size0, size1, size2, size3 = op1.data[0], op1.data[1], op1.data[2], op1.data[3], op1.data[4] + index_size0, index_size1, index_size2, index_size3 = op1.data[5], op1.data[6], op1.data[7], op1.data[8] + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; + decl {op2.name}: {op2.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; + for (let i: ubit<{index_size0}> = 0..{size0}) {{ + for (let j: ubit<{index_size1}> = 0..{size1}) {{ + for (let k: ubit<{index_size2}> = 0..{size2}) {{ + for (let l: ubit<{index_size3}> = 0..{size3}) {{ + {res.name}[i][j][k][l] := {op1.name}[i][j][k][l] {declaration.op} {op2.name}[i][j][k][l]; + }} + }} + }} + }}""" + return lower_dahlia_program(program, declaration.component_name) + + def batch_flatten(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_flatten""" op1, res = declaration.inputs[0].primitive, declaration.output.primitive @@ -114,10 +119,6 @@ def batch_flatten(declaration): op1_index_size0, op1_index_size1, op1_index_size2 = op1.data[4], op1.data[5], op1.data[6] res_bitwidth, res_size0, res_size1 = res.data[0], res.data[1], res.data[2] res_index_size0, res_index_size1 = res.data[3], res.data[4] - - assert op1.type == PrimitiveType.Memory3D and res_size1 == op1_size1 * op1_size2 and res_size0 == op1_size0 - assert res.type == PrimitiveType.Memory2D and res_bitwidth == bitwidth and op1.data_type == res.data_type - assert op1.data_type == res.data_type program = f""" decl {op1.name}: {op1.data_type}<{bitwidth}>[{op1_size0}][{op1_size1}][{op1_size2}]; decl {res.name}: {res.data_type}<{bitwidth}>[{res_size0}][{res_size1}]; @@ -140,10 +141,6 @@ def tensor2d_bias_add(declaration): bitwidth = data.data[0] size0, size1, index_size0, index_size1 = data.data[1], data.data[2], data.data[3], data.data[4] bias_size, bias_index_size = bias.data[1], bias.data[2] - - assert bitwidth == res.data[0] and bitwidth == bias.data[0] - assert size0 == res.data[1] and size1 == res.data[2] and bias_size == size1 - assert bias.type == PrimitiveType.Memory1D and data.type == PrimitiveType.Memory2D and data.type == res.type program = f""" decl {data.name}: {data.data_type}<{bitwidth}>[{size0}][{size1}]; decl {bias.name}: {bias.data_type}<{bitwidth}>[{bias_size}]; @@ -161,28 +158,58 @@ def tensor2d_bias_add(declaration): # 1. This won't work for fixed point currently, since Dahlia # will not take fixed point operands for the `>` operator. # 2. Without signed bit array support, this is also meaningless. -def tensor2d_relu(declaration): +def relu(declaration): op1, res = declaration.inputs[0].primitive, declaration.output.primitive assert res.data_type == 'ubit', f'{res.data_type} is not currently supported for ReLU.' - bitwidth, op1_size0, op1_size1 = op1.data[0], op1.data[1], op1.data[2] - op1_index_size0, op1_index_size1 = op1.data[3], op1.data[4] - res_bitwidth, res_size0, res_size1 = res.data[0], res.data[1], res.data[2] - res_index_size0, res_index_size1 = res.data[3], res.data[4] - program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{op1_size0}][{op1_size1}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{res_size0}][{res_size1}]; - let zero: {op1.data_type}<{bitwidth}> = 0; - for (let i: ubit<{op1_index_size0}> = 0..{op1_size0}) {{ - for (let j: ubit<{op1_index_size1}> = 0..{op1_size1}) {{ - if ({op1.name}[i][j] > zero) {{ - {res.name}[i][j] := {op1.name}[i][j]; - }} else {{ - {res.name}[i][j] := 0; + + if op1.type == PrimitiveType.Memory2D: + bitwidth, op1_size0, op1_size1 = op1.data[0], op1.data[1], op1.data[2] + op1_index_size0, op1_index_size1 = op1.data[3], op1.data[4] + res_bitwidth, res_size0, res_size1 = res.data[0], res.data[1], res.data[2] + res_index_size0, res_index_size1 = res.data[3], res.data[4] + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{op1_size0}][{op1_size1}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{res_size0}][{res_size1}]; + let zero: {op1.data_type}<{bitwidth}> = 0; + for (let i: ubit<{op1_index_size0}> = 0..{op1_size0}) {{ + for (let j: ubit<{op1_index_size1}> = 0..{op1_size1}) {{ + if ({op1.name}[i][j] > zero) {{ + {res.name}[i][j] := {op1.name}[i][j]; + }} else {{ + {res.name}[i][j] := 0; + }} + }} }} - }} - }} - """ - return lower_dahlia_program(program, declaration.component_name) + """ + return lower_dahlia_program(program, declaration.component_name) + + elif op1.type == PrimitiveType.Memory4D: + bitwidth, op1_size0, op1_size1 = op1.data[0], op1.data[1], op1.data[2] + op1_size2, op1_size3, op1_index_size0, = op1.data[3], op1.data[4], op1.data[5] + op1_index_size1, op1_index_size2, op1_index_size3 = op1.data[6], op1.data[7], op1.data[8] + res_bitwidth, res_size0, res_size1 = res.data[0], res.data[1], res.data[2] + res_size2, res_size3, res_index_size0, res_index_size1 = res.data[3], res.data[4], res.data[5], res.data[6] + res_index_size2, res_index_size3 = res.data[7], res.data[8] + + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{op1_size0}][{op1_size1}][{op1_size2}][{op1_size3}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{res_size0}][{res_size1}][{op1_size2}][{op1_size3}]; + let zero: {op1.data_type}<{bitwidth}> = 0; + for (let i: ubit<{op1_index_size0}> = 0..{op1_size0}) {{ + for (let j: ubit<{op1_index_size1}> = 0..{op1_size1}) {{ + for (let k: ubit<{op1_index_size2}> = 0..{op1_size2}) {{ + for (let l: ubit<{op1_index_size3}> = 0..{op1_size3}) {{ + if ({op1.name}[i][j][k][l] > zero) {{ + {res.name}[i][j][k][l] := {op1.name}[i][j][k][l]; + }} else {{ + {res.name}[i][j][k][l] := 0; + }} + }} + }} + }} + }} + """ + return lower_dahlia_program(program, declaration.component_name) def batch_matmul(declaration): @@ -192,9 +219,6 @@ def batch_matmul(declaration): M1_index_size0, M1_index_size1, M1_index_size2 = op1.data[4], op1.data[5], op1.data[6] M2_size0, M2_size1, M2_size2 = op2.data[1], op2.data[2], op2.data[3] M2_index_size0, M2_index_size1, M2_index_size2 = op2.data[4], op2.data[5], op2.data[6] - assert op1.type == PrimitiveType.Memory3D and op1.type == op2.type and op2.type == res.type - assert op1.data_type == op2.data_type and op2.data_type == res.data_type - # 1. Get transpose of second operand. # 2. Create temporary value `t`. Then, t = op1 * transpose(op2). # 3. Copy temporary value to return value.* diff --git a/frontends/relay-futil/futil_ast.py b/frontends/relay-futil/futil_ast.py index 1d469c8888..5522ca22e7 100644 --- a/frontends/relay-futil/futil_ast.py +++ b/frontends/relay-futil/futil_ast.py @@ -10,6 +10,7 @@ class PrimitiveType(Enum): Memory1D = 3 Memory2D = 4 Memory3D = 5 + Memory4D = 6 class ControlType(Enum): diff --git a/frontends/relay-futil/pretty_print.py b/frontends/relay-futil/pretty_print.py index 53eb47662d..6c65dea0dc 100644 --- a/frontends/relay-futil/pretty_print.py +++ b/frontends/relay-futil/pretty_print.py @@ -102,6 +102,17 @@ def pp_cell(cell: FCell): index_size2 = str(data[6]) return f'{cell.primitive.name} = prim std_mem_d3({bitwidth}, ' \ f'{size0}, {size1}, {size2}, {index_size0}, {index_size1}, {index_size2});' + if cell.primitive.type == PrimitiveType.Memory4D: + size0 = str(data[1]) + size1 = str(data[2]) + size2 = str(data[3]) + size3 = str(data[4]) + index_size0 = str(data[4]) + index_size1 = str(data[5]) + index_size2 = str(data[6]) + index_size3 = str(data[7]) + return f'{cell.primitive.name} = prim std_mem_d4({bitwidth}, ' \ + f'{size0}, {size1}, {size2}, {size3}, {index_size0}, {index_size1}, {index_size2}, {index_size3});' if cell.primitive.type == PrimitiveType.BinOp: op = data[1] return f'{cell.primitive.name} = prim std_{op}({bitwidth});' diff --git a/frontends/relay-futil/tests/relu.expect b/frontends/relay-futil/tests/relu.expect index 098c7bcc1d..a0998dd229 100644 --- a/frontends/relay-futil/tests/relu.expect +++ b/frontends/relay-futil/tests/relu.expect @@ -1,22 +1,34 @@ import "primitives/std.lib"; -component tensor2d_relu(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, x0_0_addr0: 2, x0_0_addr1: 3, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 2, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { +component relu(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, x0_0_0_0_addr0: 2, x0_0_0_0_addr1: 3, x0_0_0_0_addr2: 4, x0_0_0_0_addr3: 6, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 2, x10_0_0_0_addr1: 3, x10_0_0_0_addr2: 4, x10_0_0_0_addr3: 6, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { cells { - add0 = prim std_add(3); - add1 = prim std_add(2); + add0 = prim std_add(6); + add1 = prim std_add(4); + add2 = prim std_add(3); + add3 = prim std_add(2); const0 = prim std_const(32, 0); const1 = prim std_const(2, 0); + const10 = prim std_const(6, 1); + const11 = prim std_const(4, 1); + const12 = prim std_const(3, 1); + const13 = prim std_const(2, 1); const2 = prim std_const(2, 1); const3 = prim std_const(3, 0); const4 = prim std_const(3, 3); - const5 = prim std_const(32, 0); - const6 = prim std_const(3, 1); - const7 = prim std_const(2, 1); + const5 = prim std_const(4, 0); + const6 = prim std_const(4, 7); + const7 = prim std_const(6, 0); + const8 = prim std_const(6, 31); + const9 = prim std_const(32, 0); gt0 = prim std_gt(32); i0 = prim std_reg(2); j0 = prim std_reg(3); + k0 = prim std_reg(4); + l0 = prim std_reg(6); le0 = prim std_le(2); le1 = prim std_le(3); + le2 = prim std_le(4); + le3 = prim std_le(6); x_read0_0 = prim std_reg(32); x_read1_0 = prim std_reg(32); zero_0 = prim std_reg(32); @@ -34,6 +46,16 @@ component tensor2d_relu(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_r } group cond2<"static"=0> { cond2[done] = 1'd1; + le2.left = k0.out; + le2.right = const6.out; + } + group cond3<"static"=0> { + cond3[done] = 1'd1; + le3.left = l0.out; + le3.right = const8.out; + } + group cond4<"static"=0> { + cond4[done] = 1'd1; gt0.left = x_read0_0.out; gt0.right = zero_0.out; } @@ -52,47 +74,79 @@ component tensor2d_relu(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_r j0.write_en = 1'd1; let2[done] = j0.done; } + group let3<"static"=1> { + k0.in = const5.out; + k0.write_en = 1'd1; + let3[done] = k0.done; + } + group let4<"static"=1> { + l0.in = const7.out; + l0.write_en = 1'd1; + let4[done] = l0.done; + } group upd0<"static"=1> { x_read0_0.write_en = 1'd1; - x0_0_addr1 = j0.out; - x0_0_addr0 = i0.out; - x_read0_0.in = 1'd1 ? x0_0_read_data; + x0_0_0_0_addr3 = l0.out; + x0_0_0_0_addr2 = k0.out; + x0_0_0_0_addr1 = j0.out; + x0_0_0_0_addr0 = i0.out; + x_read0_0.in = 1'd1 ? x0_0_0_0_read_data; upd0[done] = x_read0_0.done ? 1'd1; } group upd1<"static"=1> { x_read1_0.write_en = 1'd1; - x0_0_addr1 = j0.out; - x0_0_addr0 = i0.out; - x_read1_0.in = 1'd1 ? x0_0_read_data; + x0_0_0_0_addr3 = l0.out; + x0_0_0_0_addr2 = k0.out; + x0_0_0_0_addr1 = j0.out; + x0_0_0_0_addr0 = i0.out; + x_read1_0.in = 1'd1 ? x0_0_0_0_read_data; upd1[done] = x_read1_0.done ? 1'd1; } group upd2<"static"=1> { - x10_0_addr1 = j0.out; - x10_0_addr0 = i0.out; - x10_0_write_en = 1'd1; - x10_0_write_data = 1'd1 ? x_read1_0.out; - upd2[done] = x10_0_done ? 1'd1; + x10_0_0_0_addr3 = l0.out; + x10_0_0_0_addr2 = k0.out; + x10_0_0_0_addr1 = j0.out; + x10_0_0_0_addr0 = i0.out; + x10_0_0_0_write_en = 1'd1; + x10_0_0_0_write_data = 1'd1 ? x_read1_0.out; + upd2[done] = x10_0_0_0_done ? 1'd1; } group upd3<"static"=1> { - x10_0_addr1 = j0.out; - x10_0_addr0 = i0.out; - x10_0_write_en = 1'd1; - x10_0_write_data = 1'd1 ? const5.out; - upd3[done] = x10_0_done ? 1'd1; + x10_0_0_0_addr3 = l0.out; + x10_0_0_0_addr2 = k0.out; + x10_0_0_0_addr1 = j0.out; + x10_0_0_0_addr0 = i0.out; + x10_0_0_0_write_en = 1'd1; + x10_0_0_0_write_data = 1'd1 ? const9.out; + upd3[done] = x10_0_0_0_done ? 1'd1; } group upd4<"static"=1> { - j0.write_en = 1'd1; - add0.left = j0.out; - add0.right = const6.out; - j0.in = 1'd1 ? add0.out; - upd4[done] = j0.done ? 1'd1; + l0.write_en = 1'd1; + add0.left = l0.out; + add0.right = const10.out; + l0.in = 1'd1 ? add0.out; + upd4[done] = l0.done ? 1'd1; } group upd5<"static"=1> { + k0.write_en = 1'd1; + add1.left = k0.out; + add1.right = const11.out; + k0.in = 1'd1 ? add1.out; + upd5[done] = k0.done ? 1'd1; + } + group upd6<"static"=1> { + j0.write_en = 1'd1; + add2.left = j0.out; + add2.right = const12.out; + j0.in = 1'd1 ? add2.out; + upd6[done] = j0.done ? 1'd1; + } + group upd7<"static"=1> { i0.write_en = 1'd1; - add1.left = i0.out; - add1.right = const7.out; - i0.in = 1'd1 ? add1.out; - upd5[done] = i0.done ? 1'd1; + add3.left = i0.out; + add3.right = const13.out; + i0.in = 1'd1 ? add3.out; + upd7[done] = i0.done ? 1'd1; } } @@ -105,19 +159,31 @@ component tensor2d_relu(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_r let2; while le1.out with cond1 { seq { - upd0; - if gt0.out with cond2 { + let3; + while le2.out with cond2 { seq { - upd1; - upd2; + let4; + while le3.out with cond3 { + seq { + upd0; + if gt0.out with cond4 { + seq { + upd1; + upd2; + } + } else { + upd3; + } + upd4; + } + } + upd5; } - } else { - upd3; } - upd4; + upd6; } } - upd5; + upd7; } } } @@ -126,27 +192,27 @@ component tensor2d_relu(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_r component main () -> () { cells { - x1 = prim std_mem_d2(32, 2, 4, 2, 3); - x = prim std_mem_d2(32, 2, 4, 2, 3); - tensor2d_relu0 = tensor2d_relu; + x1 = prim std_mem_d4(32, 2, 4, 8, 32, 32, 2, 3, 4); + x = prim std_mem_d4(32, 2, 4, 8, 32, 32, 2, 3, 4); + relu0 = relu; } wires { - group run_tensor2d_relu { - x.addr0 = tensor2d_relu0.x0_0_addr0; - tensor2d_relu0.x0_0_read_data = x.read_data; - x.addr1 = tensor2d_relu0.x0_0_addr1; - x1.addr0 = tensor2d_relu0.x10_0_addr0; - x1.addr1 = tensor2d_relu0.x10_0_addr1; - x1.write_data = tensor2d_relu0.x10_0_write_data; - x1.write_en = tensor2d_relu0.x10_0_write_en; - tensor2d_relu0.x10_0_done = x1.done; - tensor2d_relu0.go = 1'd1; - run_tensor2d_relu[done] = tensor2d_relu0.done ? 1'd1; + group run_relu { + x.addr0 = relu0.x0_0_0_0_addr0; + relu0.x0_0_0_0_read_data = x.read_data; + x.addr1 = relu0.x0_0_0_0_addr1; + x.addr2 = relu0.x0_0_0_0_addr2; + x1.addr0 = relu0.x10_0_0_0_addr0; + x1.write_data = relu0.x10_0_0_0_write_data; + x1.write_en = relu0.x10_0_0_0_write_en; + relu0.x10_0_0_0_done = x1.done; + relu0.go = 1'd1; + run_relu[done] = relu0.done ? 1'd1; } } control { seq { - run_tensor2d_relu; + run_relu; } } } diff --git a/frontends/relay-futil/tests/relu.relay b/frontends/relay-futil/tests/relu.relay index f8f324a033..fd5278c4a8 100644 --- a/frontends/relay-futil/tests/relu.relay +++ b/frontends/relay-futil/tests/relu.relay @@ -1,6 +1,6 @@ v0.0.4 -fn (%x: Tensor[(2, 4), int32]) { - let %x1: Tensor[(2, 4), int32] = nn.relu(%x); +fn (%x: Tensor[(2, 4, 8, 32), int32]) { + let %x1: Tensor[(2, 4, 8, 32), int32] = nn.relu(%x); %x1 } diff --git a/frontends/relay-futil/utilities.py b/frontends/relay-futil/utilities.py index 8deeb217c1..39a4591af1 100644 --- a/frontends/relay-futil/utilities.py +++ b/frontends/relay-futil/utilities.py @@ -67,6 +67,7 @@ def get_memory_parameters(type): if len(tensor_dimensions) == 1: primitive_type = PrimitiveType.Memory1D if len(tensor_dimensions) == 2: primitive_type = PrimitiveType.Memory2D if len(tensor_dimensions) == 3: primitive_type = PrimitiveType.Memory3D + if len(tensor_dimensions) == 4: primitive_type = PrimitiveType.Memory4D return data, primitive_type, data_type From 40db68f0ae901b3dd74548a7c7cec0efca339833 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 20 Nov 2020 15:21:35 -0500 Subject: [PATCH 19/75] Remove comma. --- primitives/std.lib | 75 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/primitives/std.lib b/primitives/std.lib index 805384ceee..7f4d887efa 100644 --- a/primitives/std.lib +++ b/primitives/std.lib @@ -158,6 +158,81 @@ primitive std_mem_d3_ext[ clk: 1 ) -> (read_data: width, done: 1) {} +primitive std_mem_d4[ + width, + d0_size, + d1_size, + d2_size, + d3_size, + d0_idx_size, + d1_idx_size, + d2_idx_size, + d3_idx_size +] ( + addr0: d0_idx_size, + addr1: d1_idx_size, + addr2: d2_idx_size, + addr3: d3_idx_size, + write_data: width, + write_en: 1, + clk: 1 +) -> (read_data: width, done: 1) { + verilog { + module std_mem_d4 + #(parameter width = 32, + parameter d0_size = 16, + parameter d1_size = 16, + parameter d2_size = 16, + parameter d3_size = 16, + parameter d0_idx_size = 4, + parameter d1_idx_size = 4, + parameter d2_idx_size = 4, + parameter d3_idx_size = 4,) + (input logic [d0_idx_size-1:0] addr0, + input logic [d1_idx_size-1:0] addr1, + input logic [d2_idx_size-1:0] addr2, + input logic [d3_idx_size-1:0] addr3, + input logic [width-1:0] write_data, + input logic write_en, + input logic clk, + output logic [width-1:0] read_data, + output logic done); + + /* verilator lint_off WIDTH */ + logic [width-1:0] mem[d0_size-1:0][d1_size-1:0][d2_size-1:0][d3_size-1:0]; + + assign read_data = mem[addr0][addr1][addr2][addr3]; + always_ff @(posedge clk) begin + if (write_en) begin + mem[addr0][addr1][addr2][addr3] <= write_data; + done <= 1'd1; + end else + done <= 1'd0; + end + endmodule + } +} + +primitive std_mem_d4_ext[ + width, + d0_size, + d1_size, + d2_size, + d3_size, + d0_idx_size, + d1_idx_size, + d2_idx_size, + d3_idx_size +] ( + addr0: d0_idx_size, + addr1: d1_idx_size, + addr2: d2_idx_size, + addr3: d3_idx_size, + write_data: width, + write_en: 1, + clk: 1 +) -> (read_data: width, done: 1) {} + primitive std_logsize_mem_d1[width, logsize]( addr0: logsize, write_data: width, From 63535b2241acc85ab0e838a604975e2a9f37ea30 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 20 Nov 2020 20:20:03 -0500 Subject: [PATCH 20/75] Add negative, non-working expand_dims. --- frontends/relay-futil/compiler.py | 51 +++++------ frontends/relay-futil/dahlia_functions.py | 35 +++++++- frontends/relay-futil/example.py | 8 +- frontends/relay-futil/futil_ast.py | 12 +-- frontends/relay-futil/tests/add.expect | 28 +++---- frontends/relay-futil/tests/bias_add.expect | 32 +++---- .../relay-futil/tests/expand_dims.expect | 0 frontends/relay-futil/tests/expand_dims.relay | 6 ++ .../relay-futil/tests/fixed_point_add.expect | 28 +++---- frontends/relay-futil/tests/let1.expect | 28 +++---- frontends/relay-futil/tests/let2.expect | 58 ++++++------- frontends/relay-futil/tests/let3.expect | 84 +++++++++---------- frontends/relay-futil/tests/sub.expect | 28 +++---- .../relay-futil/tests/tensor1d_mult.expect | 28 +++---- .../relay-futil/tests/tensor2d_add.expect | 34 ++++---- .../relay-futil/tests/tensor3d_divide.expect | 40 ++++----- 16 files changed, 268 insertions(+), 232 deletions(-) create mode 100644 frontends/relay-futil/tests/expand_dims.expect create mode 100644 frontends/relay-futil/tests/expand_dims.relay diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index dc5c47df93..9f97f7ac11 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -9,7 +9,15 @@ from dahlia_functions import * # Mapping from Relay binary calls to the respective Dahlia operator. -BuiltInBinaryCalls = {'add': '+', 'divide': '/', 'multiply': '*', 'subtract': '-'} +BuiltInBinaryOps = {'add': '+', 'divide': '/', 'multiply': '*', 'subtract': '-'} + +# Mapping from Tensor dimensions to function type. +BinaryOpTensorDimensions = {PrimitiveType.Memory1D: tensor1d_op, PrimitiveType.Memory2D: tensor2d_op, + PrimitiveType.Memory3D: tensor3d_op, PrimitiveType.Memory4D: tensor4d_op} + +# Mapping from Relay function names to their respective Dahlia lowering. +RelayFunctionCalls = {'nn.batch_flatten': batch_flatten, 'nn.batch_matmul': batch_matmul, + 'nn.bias_add': bias_add, 'nn.relu': relu, 'negative': negative, 'expand_dims': expand_dims} class Relay2Futil(ExprFunctor): @@ -52,12 +60,11 @@ def produce_dahlia_name(self, name, type): Memory2D: 'X0_0', 'X1_0', 'X2_0', ... Memory3D: 'X0_0_0', 'X1_0_0', 'X2_0_0', ... """ + DahliaNameMapping = {PrimitiveType.Memory1D: '', PrimitiveType.Memory2D: '_0', + PrimitiveType.Memory3D: '_0_0', PrimitiveType.Memory4D: '_0_0_0'} dahlia_name = self.id(name) - if type == PrimitiveType.Memory1D: return dahlia_name - if type == PrimitiveType.Memory2D: return dahlia_name + "_0" - if type == PrimitiveType.Memory3D: return dahlia_name + "_0_0" - if type == PrimitiveType.Memory4D: return dahlia_name + "_0_0_0" - assert False, f'{name} with {type} is not supported yet.' + assert type in DahliaNameMapping, f'{name} with {type} is not supported yet.' + return DahliaNameMapping[type] def get_dahlia_declaration(self, function_name, cells, args): """ @@ -66,30 +73,14 @@ def get_dahlia_declaration(self, function_name, cells, args): """ input_type = cells[0].primitive.type function = name = op = None - - if function_name in BuiltInBinaryCalls: - op = BuiltInBinaryCalls[function_name] - if input_type == PrimitiveType.Memory1D: - function, name = tensor1d_op, f'tensor1d_{function_name}' - elif input_type == PrimitiveType.Memory2D: - function, name = tensor2d_op, f'tensor2d_{function_name}' - elif input_type == PrimitiveType.Memory3D: - function, name = tensor3d_op, f'tensor3d_{function_name}' - elif input_type == PrimitiveType.Memory4D: - function, name = tensor4d_op, f'tensor4d_{function_name}' - - if function_name == "nn.batch_flatten": - if input_type == PrimitiveType.Memory3D: function = batch_flatten - elif function_name == "nn.batch_matmul": - function = batch_matmul - elif function_name == "nn.bias_add": - if input_type == PrimitiveType.Memory2D: function = tensor2d_bias_add - elif function_name == "nn.relu": - assert input_type == PrimitiveType.Memory2D or input_type == PrimitiveType.Memory4D - function = relu - - assert function != None, f'{function_name} with type {input_type} is not supported.' - if name == None: name = function.__name__ + if function_name in BuiltInBinaryOps: + op = BuiltInBinaryOps[function_name] + function, name = BinaryOpTensorDimensions[input_type], function_name + elif function_name in RelayFunctionCalls: + function = RelayFunctionCalls[function_name] + name = function.__name__ + else: + assert False, f'{function_name} with type {input_type} is not supported.' return DahliaDeclaration(component_name=self.relay_id(name), decl_name=self.id(name), op=op, inputs=args, function=function) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index ac08cca3af..50dd7100d4 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -91,6 +91,7 @@ def tensor3d_op(declaration): }}""" return lower_dahlia_program(program, declaration.component_name) + def tensor4d_op(declaration): op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive bitwidth, size0, size1, size2, size3 = op1.data[0], op1.data[1], op1.data[2], op1.data[3], op1.data[4] @@ -133,7 +134,7 @@ def batch_flatten(declaration): return lower_dahlia_program(program, declaration.component_name) -def tensor2d_bias_add(declaration): +def bias_add(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.bias_add""" # Assumes default value axis=1 is passed in. data, bias, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive @@ -158,6 +159,7 @@ def tensor2d_bias_add(declaration): # will not take fixed point operands for the `>` operator. # 2. Without signed bit array support, this is also meaningless. def relu(declaration): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.relu""" op1, res = declaration.inputs[0].primitive, declaration.output.primitive assert res.data_type == 'ubit', f'{res.data_type} is not currently supported for ReLU.' @@ -211,6 +213,37 @@ def relu(declaration): return lower_dahlia_program(program, declaration.component_name) +# TODO(cgyurgyik): Similar to ReLU, this requires signed operands. +def negative(declaration): + """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.negative""" + op1, res = declaration.inputs[0].primitive, declaration.output.primitive + bitwidth, size, index_size = op1.data[0], op1.data[1], op1.data[2] + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{size}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{size}]; + for (let i: ubit<{index_size}> = 0..{size}) {{ + {res.name}[i] := -{op1.name}[i]; + }} + """ + return lower_dahlia_program(program, declaration.component_name) + + +def expand_dims(declaration): + """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.expand_dims""" + op1, res = declaration.inputs[0].primitive, declaration.output.primitive + bitwidth, size, index_size = op1.data[0], op1.data[1], op1.data[2] + size0, size1, size2 = res.data[1], res.data[2], res.data[3] + index_size0, index_size1, index_size2 = res.data[4], res.data[5], res.data[6] + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{size}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}][{size2}]; + for (let i: ubit<{index_size}> = 0..{size}) {{ + {res.name}[i][0][0] := {op1.name}[i]; + }} + """ + return lower_dahlia_program(program, declaration.component_name) + + def batch_matmul(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_matmul""" op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index 3531e082b7..779493b1a4 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -16,6 +16,11 @@ def tensor_subtract(): return relay.Function([x, y], relay.subtract(x, y)) +def expand_dims(): + x = relay.var('x', shape=[4], dtype='int32') + return relay.Function([x], relay.expand_dims(x, axis=1, num_newaxis=2)) + + def batch_flatten(): x = relay.var("x", relay.TensorType((2, 5, 5), "int32")) return relay.Function([x], relay.nn.batch_flatten(x)) @@ -37,6 +42,7 @@ def relu(): x = relay.var('x', shape=[2, 4], dtype='int32') return relay.Function([x], relay.nn.relu(x)) + def mlp_net(): """The MLP test from Relay.""" from tvm.relay.testing import mlp @@ -50,7 +56,7 @@ def vgg_net(): batch_norm=True) -ALL_FUNCS = [add, tensor_subtract, batch_flatten, batch_matmul, bias_add, relu, mlp_net, vgg_net] +ALL_FUNCS = [add, tensor_subtract, expand_dims, batch_flatten, batch_matmul, bias_add, relu, mlp_net, vgg_net] FUNC_NAMES = list(map(lambda x: x.__name__, ALL_FUNCS)) diff --git a/frontends/relay-futil/futil_ast.py b/frontends/relay-futil/futil_ast.py index 5522ca22e7..868e6ffbcf 100644 --- a/frontends/relay-futil/futil_ast.py +++ b/frontends/relay-futil/futil_ast.py @@ -5,12 +5,12 @@ class PrimitiveType(Enum): - Register = 1 - Constant = 2 - Memory1D = 3 - Memory2D = 4 - Memory3D = 5 - Memory4D = 6 + Memory1D = 1 + Memory2D = 2 + Memory3D = 3 + Memory4D = 4 + Register = 5 + Constant = 6 class ControlType(Enum): diff --git a/frontends/relay-futil/tests/add.expect b/frontends/relay-futil/tests/add.expect index a67d257997..f239d18b42 100644 --- a/frontends/relay-futil/tests/add.expect +++ b/frontends/relay-futil/tests/add.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component tensor1d_add(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { +component add(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { add0 = prim std_add(32); add1 = prim std_add(1); @@ -74,25 +74,25 @@ component main () -> () { z = prim std_mem_d1(32, 1, 1); x = prim std_mem_d1(32, 1, 1); y = prim std_mem_d1(32, 1, 1); - tensor1d_add0 = tensor1d_add; + add0 = add; } wires { - group run_tensor1d_add { - x.addr0 = tensor1d_add0.x0_addr0; - tensor1d_add0.x0_read_data = x.read_data; - y.addr0 = tensor1d_add0.y0_addr0; - tensor1d_add0.y0_read_data = y.read_data; - z.addr0 = tensor1d_add0.z0_addr0; - z.write_data = tensor1d_add0.z0_write_data; - z.write_en = tensor1d_add0.z0_write_en; - tensor1d_add0.z0_done = z.done; - tensor1d_add0.go = 1'd1; - run_tensor1d_add[done] = tensor1d_add0.done ? 1'd1; + group run_add { + x.addr0 = add0.x0_addr0; + add0.x0_read_data = x.read_data; + y.addr0 = add0.y0_addr0; + add0.y0_read_data = y.read_data; + z.addr0 = add0.z0_addr0; + z.write_data = add0.z0_write_data; + z.write_en = add0.z0_write_en; + add0.z0_done = z.done; + add0.go = 1'd1; + run_add[done] = add0.done ? 1'd1; } } control { seq { - run_tensor1d_add; + run_add; } } } diff --git a/frontends/relay-futil/tests/bias_add.expect b/frontends/relay-futil/tests/bias_add.expect index ee66f8942d..08566e1f14 100644 --- a/frontends/relay-futil/tests/bias_add.expect +++ b/frontends/relay-futil/tests/bias_add.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component tensor2d_bias_add(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, bias0_addr0: 7, bias0_write_data: 32, bias0_write_en: 1, bias0_clk: 1, x0_0_addr0: 1, x0_0_addr1: 7, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 7, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { +component bias_add(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, bias0_addr0: 7, bias0_write_data: 32, bias0_write_en: 1, bias0_clk: 1, x0_0_addr0: 1, x0_0_addr1: 7, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 7, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { cells { add0 = prim fixed_p_std_add(32, 16, 16); add1 = prim std_add(7); @@ -105,27 +105,27 @@ component main () -> () { x1 = prim std_mem_d2(32, 1, 64, 1, 7); x = prim std_mem_d2(32, 1, 64, 1, 7); bias = prim std_mem_d1(32, 64, 7); - tensor2d_bias_add0 = tensor2d_bias_add; + bias_add0 = bias_add; } wires { - group run_tensor2d_bias_add { - x.addr0 = tensor2d_bias_add0.x0_0_addr0; - tensor2d_bias_add0.x0_0_read_data = x.read_data; - x.addr1 = tensor2d_bias_add0.x0_0_addr1; - bias.addr0 = tensor2d_bias_add0.bias0_addr0; - tensor2d_bias_add0.bias0_read_data = bias.read_data; - x1.addr0 = tensor2d_bias_add0.x10_0_addr0; - x1.addr1 = tensor2d_bias_add0.x10_0_addr1; - x1.write_data = tensor2d_bias_add0.x10_0_write_data; - x1.write_en = tensor2d_bias_add0.x10_0_write_en; - tensor2d_bias_add0.x10_0_done = x1.done; - tensor2d_bias_add0.go = 1'd1; - run_tensor2d_bias_add[done] = tensor2d_bias_add0.done ? 1'd1; + group run_bias_add { + x.addr0 = bias_add0.x0_0_addr0; + bias_add0.x0_0_read_data = x.read_data; + x.addr1 = bias_add0.x0_0_addr1; + bias.addr0 = bias_add0.bias0_addr0; + bias_add0.bias0_read_data = bias.read_data; + x1.addr0 = bias_add0.x10_0_addr0; + x1.addr1 = bias_add0.x10_0_addr1; + x1.write_data = bias_add0.x10_0_write_data; + x1.write_en = bias_add0.x10_0_write_en; + bias_add0.x10_0_done = x1.done; + bias_add0.go = 1'd1; + run_bias_add[done] = bias_add0.done ? 1'd1; } } control { seq { - run_tensor2d_bias_add; + run_bias_add; } } } diff --git a/frontends/relay-futil/tests/expand_dims.expect b/frontends/relay-futil/tests/expand_dims.expect new file mode 100644 index 0000000000..e69de29bb2 diff --git a/frontends/relay-futil/tests/expand_dims.relay b/frontends/relay-futil/tests/expand_dims.relay new file mode 100644 index 0000000000..47ae5ce31a --- /dev/null +++ b/frontends/relay-futil/tests/expand_dims.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%x: Tensor[(4), int32]) { + let %x1 = expand_dims(%x, axis=1, num_newaxis=2); + %x1 +} + diff --git a/frontends/relay-futil/tests/fixed_point_add.expect b/frontends/relay-futil/tests/fixed_point_add.expect index 3a08f950e8..aa8240b4cf 100644 --- a/frontends/relay-futil/tests/fixed_point_add.expect +++ b/frontends/relay-futil/tests/fixed_point_add.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component tensor1d_add(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { +component add(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { add0 = prim fixed_p_std_add(32, 16, 16); add1 = prim std_add(1); @@ -74,25 +74,25 @@ component main () -> () { z = prim std_mem_d1(32, 1, 1); x = prim std_mem_d1(32, 1, 1); y = prim std_mem_d1(32, 1, 1); - tensor1d_add0 = tensor1d_add; + add0 = add; } wires { - group run_tensor1d_add { - x.addr0 = tensor1d_add0.x0_addr0; - tensor1d_add0.x0_read_data = x.read_data; - y.addr0 = tensor1d_add0.y0_addr0; - tensor1d_add0.y0_read_data = y.read_data; - z.addr0 = tensor1d_add0.z0_addr0; - z.write_data = tensor1d_add0.z0_write_data; - z.write_en = tensor1d_add0.z0_write_en; - tensor1d_add0.z0_done = z.done; - tensor1d_add0.go = 1'd1; - run_tensor1d_add[done] = tensor1d_add0.done ? 1'd1; + group run_add { + x.addr0 = add0.x0_addr0; + add0.x0_read_data = x.read_data; + y.addr0 = add0.y0_addr0; + add0.y0_read_data = y.read_data; + z.addr0 = add0.z0_addr0; + z.write_data = add0.z0_write_data; + z.write_en = add0.z0_write_en; + add0.z0_done = z.done; + add0.go = 1'd1; + run_add[done] = add0.done ? 1'd1; } } control { seq { - run_tensor1d_add; + run_add; } } } diff --git a/frontends/relay-futil/tests/let1.expect b/frontends/relay-futil/tests/let1.expect index 77312716ef..cf228003ae 100644 --- a/frontends/relay-futil/tests/let1.expect +++ b/frontends/relay-futil/tests/let1.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component tensor1d_multiply(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { +component multiply(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(1); @@ -82,25 +82,25 @@ component main () -> () { z = prim std_mem_d1(32, 1, 1); a = prim std_mem_d1(32, 1, 1); b = prim std_mem_d1(32, 1, 1); - tensor1d_multiply0 = tensor1d_multiply; + multiply0 = multiply; } wires { - group run_tensor1d_multiply { - a.addr0 = tensor1d_multiply0.a0_addr0; - tensor1d_multiply0.a0_read_data = a.read_data; - b.addr0 = tensor1d_multiply0.b0_addr0; - tensor1d_multiply0.b0_read_data = b.read_data; - z.addr0 = tensor1d_multiply0.z0_addr0; - z.write_data = tensor1d_multiply0.z0_write_data; - z.write_en = tensor1d_multiply0.z0_write_en; - tensor1d_multiply0.z0_done = z.done; - tensor1d_multiply0.go = 1'd1; - run_tensor1d_multiply[done] = tensor1d_multiply0.done ? 1'd1; + group run_multiply { + a.addr0 = multiply0.a0_addr0; + multiply0.a0_read_data = a.read_data; + b.addr0 = multiply0.b0_addr0; + multiply0.b0_read_data = b.read_data; + z.addr0 = multiply0.z0_addr0; + z.write_data = multiply0.z0_write_data; + z.write_en = multiply0.z0_write_en; + multiply0.z0_done = z.done; + multiply0.go = 1'd1; + run_multiply[done] = multiply0.done ? 1'd1; } } control { seq { - run_tensor1d_multiply; + run_multiply; } } } diff --git a/frontends/relay-futil/tests/let2.expect b/frontends/relay-futil/tests/let2.expect index 88da5412ff..b9a9bfd9ec 100644 --- a/frontends/relay-futil/tests/let2.expect +++ b/frontends/relay-futil/tests/let2.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component tensor1d_add(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { +component add(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(32); @@ -68,7 +68,7 @@ component tensor1d_add(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data } } } -component tensor1d_multiply(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { +component multiply(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(1); @@ -150,40 +150,40 @@ component main () -> () { d = prim std_mem_d1(32, 1, 1); c = prim std_mem_d1(32, 1, 1); a = prim std_mem_d1(32, 1, 1); - tensor1d_add0 = tensor1d_add; + add0 = add; b = prim std_mem_d1(32, 1, 1); - tensor1d_multiply0 = tensor1d_multiply; + multiply0 = multiply; } wires { - group run_tensor1d_multiply { - a.addr0 = tensor1d_multiply0.a0_addr0; - tensor1d_multiply0.a0_read_data = a.read_data; - b.addr0 = tensor1d_multiply0.b0_addr0; - tensor1d_multiply0.b0_read_data = b.read_data; - c.addr0 = tensor1d_multiply0.c0_addr0; - c.write_data = tensor1d_multiply0.c0_write_data; - c.write_en = tensor1d_multiply0.c0_write_en; - tensor1d_multiply0.c0_done = c.done; - tensor1d_multiply0.go = 1'd1; - run_tensor1d_multiply[done] = tensor1d_multiply0.done ? 1'd1; - } - group run_tensor1d_add { - c.addr0 = tensor1d_add0.c0_addr0; - tensor1d_add0.c0_read_data = c.read_data; - a.addr0 = tensor1d_add0.a0_addr0; - tensor1d_add0.a0_read_data = a.read_data; - d.addr0 = tensor1d_add0.d0_addr0; - d.write_data = tensor1d_add0.d0_write_data; - d.write_en = tensor1d_add0.d0_write_en; - tensor1d_add0.d0_done = d.done; - tensor1d_add0.go = 1'd1; - run_tensor1d_add[done] = tensor1d_add0.done ? 1'd1; + group run_multiply { + a.addr0 = multiply0.a0_addr0; + multiply0.a0_read_data = a.read_data; + b.addr0 = multiply0.b0_addr0; + multiply0.b0_read_data = b.read_data; + c.addr0 = multiply0.c0_addr0; + c.write_data = multiply0.c0_write_data; + c.write_en = multiply0.c0_write_en; + multiply0.c0_done = c.done; + multiply0.go = 1'd1; + run_multiply[done] = multiply0.done ? 1'd1; + } + group run_add { + c.addr0 = add0.c0_addr0; + add0.c0_read_data = c.read_data; + a.addr0 = add0.a0_addr0; + add0.a0_read_data = a.read_data; + d.addr0 = add0.d0_addr0; + d.write_data = add0.d0_write_data; + d.write_en = add0.d0_write_en; + add0.d0_done = d.done; + add0.go = 1'd1; + run_add[done] = add0.done ? 1'd1; } } control { seq { - run_tensor1d_multiply; - run_tensor1d_add; + run_multiply; + run_add; } } } diff --git a/frontends/relay-futil/tests/let3.expect b/frontends/relay-futil/tests/let3.expect index 3ef8c350f2..11b79b4180 100644 --- a/frontends/relay-futil/tests/let3.expect +++ b/frontends/relay-futil/tests/let3.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component tensor1d_multiply(go: 1, clk: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1, e0_read_data: 32, e0_done: 1) -> (done: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1, e0_addr0: 1, e0_write_data: 32, e0_write_en: 1, e0_clk: 1) { +component multiply(go: 1, clk: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1, e0_read_data: 32, e0_done: 1) -> (done: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1, e0_addr0: 1, e0_write_data: 32, e0_write_en: 1, e0_clk: 1) { cells { add0 = prim std_add(1); bin_read0_0 = prim std_reg(32); @@ -76,7 +76,7 @@ component tensor1d_multiply(go: 1, clk: 1, c0_read_data: 32, c0_done: 1, d0_read } } } -component tensor1d_divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { +component divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(1); @@ -152,7 +152,7 @@ component tensor1d_divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_d } } } -component tensor1d_subtract(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { +component subtract(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(1); @@ -226,55 +226,55 @@ component main () -> () { e = prim std_mem_d1(32, 1, 1); c = prim std_mem_d1(32, 1, 1); d = prim std_mem_d1(32, 1, 1); - tensor1d_multiply0 = tensor1d_multiply; + multiply0 = multiply; a = prim std_mem_d1(32, 1, 1); - tensor1d_divide0 = tensor1d_divide; + divide0 = divide; b = prim std_mem_d1(32, 1, 1); - tensor1d_subtract0 = tensor1d_subtract; + subtract0 = subtract; } wires { - group run_tensor1d_subtract { - a.addr0 = tensor1d_subtract0.a0_addr0; - tensor1d_subtract0.a0_read_data = a.read_data; - b.addr0 = tensor1d_subtract0.b0_addr0; - tensor1d_subtract0.b0_read_data = b.read_data; - c.addr0 = tensor1d_subtract0.c0_addr0; - c.write_data = tensor1d_subtract0.c0_write_data; - c.write_en = tensor1d_subtract0.c0_write_en; - tensor1d_subtract0.c0_done = c.done; - tensor1d_subtract0.go = 1'd1; - run_tensor1d_subtract[done] = tensor1d_subtract0.done ? 1'd1; + group run_subtract { + a.addr0 = subtract0.a0_addr0; + subtract0.a0_read_data = a.read_data; + b.addr0 = subtract0.b0_addr0; + subtract0.b0_read_data = b.read_data; + c.addr0 = subtract0.c0_addr0; + c.write_data = subtract0.c0_write_data; + c.write_en = subtract0.c0_write_en; + subtract0.c0_done = c.done; + subtract0.go = 1'd1; + run_subtract[done] = subtract0.done ? 1'd1; } - group run_tensor1d_divide { - c.addr0 = tensor1d_divide0.c0_addr0; - tensor1d_divide0.c0_read_data = c.read_data; - a.addr0 = tensor1d_divide0.a0_addr0; - tensor1d_divide0.a0_read_data = a.read_data; - d.addr0 = tensor1d_divide0.d0_addr0; - d.write_data = tensor1d_divide0.d0_write_data; - d.write_en = tensor1d_divide0.d0_write_en; - tensor1d_divide0.d0_done = d.done; - tensor1d_divide0.go = 1'd1; - run_tensor1d_divide[done] = tensor1d_divide0.done ? 1'd1; + group run_divide { + c.addr0 = divide0.c0_addr0; + divide0.c0_read_data = c.read_data; + a.addr0 = divide0.a0_addr0; + divide0.a0_read_data = a.read_data; + d.addr0 = divide0.d0_addr0; + d.write_data = divide0.d0_write_data; + d.write_en = divide0.d0_write_en; + divide0.d0_done = d.done; + divide0.go = 1'd1; + run_divide[done] = divide0.done ? 1'd1; } - group run_tensor1d_multiply { - c.addr0 = tensor1d_multiply0.c0_addr0; - tensor1d_multiply0.c0_read_data = c.read_data; - d.addr0 = tensor1d_multiply0.d0_addr0; - tensor1d_multiply0.d0_read_data = d.read_data; - e.addr0 = tensor1d_multiply0.e0_addr0; - e.write_data = tensor1d_multiply0.e0_write_data; - e.write_en = tensor1d_multiply0.e0_write_en; - tensor1d_multiply0.e0_done = e.done; - tensor1d_multiply0.go = 1'd1; - run_tensor1d_multiply[done] = tensor1d_multiply0.done ? 1'd1; + group run_multiply { + c.addr0 = multiply0.c0_addr0; + multiply0.c0_read_data = c.read_data; + d.addr0 = multiply0.d0_addr0; + multiply0.d0_read_data = d.read_data; + e.addr0 = multiply0.e0_addr0; + e.write_data = multiply0.e0_write_data; + e.write_en = multiply0.e0_write_en; + multiply0.e0_done = e.done; + multiply0.go = 1'd1; + run_multiply[done] = multiply0.done ? 1'd1; } } control { seq { - run_tensor1d_subtract; - run_tensor1d_divide; - run_tensor1d_multiply; + run_subtract; + run_divide; + run_multiply; } } } diff --git a/frontends/relay-futil/tests/sub.expect b/frontends/relay-futil/tests/sub.expect index 9cac092744..c74af4fb2e 100644 --- a/frontends/relay-futil/tests/sub.expect +++ b/frontends/relay-futil/tests/sub.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component tensor1d_subtract(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { +component subtract(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { add0 = prim std_add(1); const0 = prim std_const(1, 0); @@ -74,25 +74,25 @@ component main () -> () { z = prim std_mem_d1(32, 1, 1); x = prim std_mem_d1(32, 1, 1); y = prim std_mem_d1(32, 1, 1); - tensor1d_subtract0 = tensor1d_subtract; + subtract0 = subtract; } wires { - group run_tensor1d_subtract { - x.addr0 = tensor1d_subtract0.x0_addr0; - tensor1d_subtract0.x0_read_data = x.read_data; - y.addr0 = tensor1d_subtract0.y0_addr0; - tensor1d_subtract0.y0_read_data = y.read_data; - z.addr0 = tensor1d_subtract0.z0_addr0; - z.write_data = tensor1d_subtract0.z0_write_data; - z.write_en = tensor1d_subtract0.z0_write_en; - tensor1d_subtract0.z0_done = z.done; - tensor1d_subtract0.go = 1'd1; - run_tensor1d_subtract[done] = tensor1d_subtract0.done ? 1'd1; + group run_subtract { + x.addr0 = subtract0.x0_addr0; + subtract0.x0_read_data = x.read_data; + y.addr0 = subtract0.y0_addr0; + subtract0.y0_read_data = y.read_data; + z.addr0 = subtract0.z0_addr0; + z.write_data = subtract0.z0_write_data; + z.write_en = subtract0.z0_write_en; + subtract0.z0_done = z.done; + subtract0.go = 1'd1; + run_subtract[done] = subtract0.done ? 1'd1; } } control { seq { - run_tensor1d_subtract; + run_subtract; } } } diff --git a/frontends/relay-futil/tests/tensor1d_mult.expect b/frontends/relay-futil/tests/tensor1d_mult.expect index d6086cd33d..dac0e76d85 100644 --- a/frontends/relay-futil/tests/tensor1d_mult.expect +++ b/frontends/relay-futil/tests/tensor1d_mult.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component tensor1d_multiply(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, x10_read_data: 32, x10_done: 1, y0_read_data: 32, y0_done: 1) -> (done: 1, x0_addr0: 3, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, x10_addr0: 3, x10_write_data: 32, x10_write_en: 1, x10_clk: 1, y0_addr0: 3, y0_write_data: 32, y0_write_en: 1, y0_clk: 1) { +component multiply(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, x10_read_data: 32, x10_done: 1, y0_read_data: 32, y0_done: 1) -> (done: 1, x0_addr0: 3, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, x10_addr0: 3, x10_write_data: 32, x10_write_en: 1, x10_clk: 1, y0_addr0: 3, y0_write_data: 32, y0_write_en: 1, y0_clk: 1) { cells { add0 = prim std_add(3); bin_read0_0 = prim std_reg(32); @@ -82,25 +82,25 @@ component main () -> () { x1 = prim std_mem_d1(32, 4, 3); x = prim std_mem_d1(32, 4, 3); y = prim std_mem_d1(32, 4, 3); - tensor1d_multiply0 = tensor1d_multiply; + multiply0 = multiply; } wires { - group run_tensor1d_multiply { - x.addr0 = tensor1d_multiply0.x0_addr0; - tensor1d_multiply0.x0_read_data = x.read_data; - y.addr0 = tensor1d_multiply0.y0_addr0; - tensor1d_multiply0.y0_read_data = y.read_data; - x1.addr0 = tensor1d_multiply0.x10_addr0; - x1.write_data = tensor1d_multiply0.x10_write_data; - x1.write_en = tensor1d_multiply0.x10_write_en; - tensor1d_multiply0.x10_done = x1.done; - tensor1d_multiply0.go = 1'd1; - run_tensor1d_multiply[done] = tensor1d_multiply0.done ? 1'd1; + group run_multiply { + x.addr0 = multiply0.x0_addr0; + multiply0.x0_read_data = x.read_data; + y.addr0 = multiply0.y0_addr0; + multiply0.y0_read_data = y.read_data; + x1.addr0 = multiply0.x10_addr0; + x1.write_data = multiply0.x10_write_data; + x1.write_en = multiply0.x10_write_en; + multiply0.x10_done = x1.done; + multiply0.go = 1'd1; + run_multiply[done] = multiply0.done ? 1'd1; } } control { seq { - run_tensor1d_multiply; + run_multiply; } } } diff --git a/frontends/relay-futil/tests/tensor2d_add.expect b/frontends/relay-futil/tests/tensor2d_add.expect index 46db3a2cab..d289badb27 100644 --- a/frontends/relay-futil/tests/tensor2d_add.expect +++ b/frontends/relay-futil/tests/tensor2d_add.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component tensor2d_add(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1, y0_0_read_data: 32, y0_0_done: 1) -> (done: 1, x0_0_addr0: 2, x0_0_addr1: 3, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 2, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, y0_0_addr0: 2, y0_0_addr1: 3, y0_0_write_data: 32, y0_0_write_en: 1, y0_0_clk: 1) { +component add(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1, y0_0_read_data: 32, y0_0_done: 1) -> (done: 1, x0_0_addr0: 2, x0_0_addr1: 3, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 2, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, y0_0_addr0: 2, y0_0_addr1: 3, y0_0_write_data: 32, y0_0_write_en: 1, y0_0_clk: 1) { cells { add0 = prim std_add(32); add1 = prim std_add(3); @@ -106,28 +106,28 @@ component main () -> () { x1 = prim std_mem_d2(32, 2, 4, 2, 3); x = prim std_mem_d2(32, 2, 4, 2, 3); y = prim std_mem_d2(32, 2, 4, 2, 3); - tensor2d_add0 = tensor2d_add; + add0 = add; } wires { - group run_tensor2d_add { - x.addr0 = tensor2d_add0.x0_0_addr0; - tensor2d_add0.x0_0_read_data = x.read_data; - x.addr1 = tensor2d_add0.x0_0_addr1; - y.addr0 = tensor2d_add0.y0_0_addr0; - tensor2d_add0.y0_0_read_data = y.read_data; - y.addr1 = tensor2d_add0.y0_0_addr1; - x1.addr0 = tensor2d_add0.x10_0_addr0; - x1.addr1 = tensor2d_add0.x10_0_addr1; - x1.write_data = tensor2d_add0.x10_0_write_data; - x1.write_en = tensor2d_add0.x10_0_write_en; - tensor2d_add0.x10_0_done = x1.done; - tensor2d_add0.go = 1'd1; - run_tensor2d_add[done] = tensor2d_add0.done ? 1'd1; + group run_add { + x.addr0 = add0.x0_0_addr0; + add0.x0_0_read_data = x.read_data; + x.addr1 = add0.x0_0_addr1; + y.addr0 = add0.y0_0_addr0; + add0.y0_0_read_data = y.read_data; + y.addr1 = add0.y0_0_addr1; + x1.addr0 = add0.x10_0_addr0; + x1.addr1 = add0.x10_0_addr1; + x1.write_data = add0.x10_0_write_data; + x1.write_en = add0.x10_0_write_en; + add0.x10_0_done = x1.done; + add0.go = 1'd1; + run_add[done] = add0.done ? 1'd1; } } control { seq { - run_tensor2d_add; + run_add; } } } diff --git a/frontends/relay-futil/tests/tensor3d_divide.expect b/frontends/relay-futil/tests/tensor3d_divide.expect index 10eb243cc1..5058296dd8 100644 --- a/frontends/relay-futil/tests/tensor3d_divide.expect +++ b/frontends/relay-futil/tests/tensor3d_divide.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component tensor3d_divide(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_0_read_data: 32, x10_0_0_done: 1, y0_0_0_read_data: 32, y0_0_0_done: 1) -> (done: 1, x0_0_0_addr0: 3, x0_0_0_addr1: 3, x0_0_0_addr2: 3, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1, x10_0_0_addr0: 3, x10_0_0_addr1: 3, x10_0_0_addr2: 3, x10_0_0_write_data: 32, x10_0_0_write_en: 1, x10_0_0_clk: 1, y0_0_0_addr0: 3, y0_0_0_addr1: 3, y0_0_0_addr2: 3, y0_0_0_write_data: 32, y0_0_0_write_en: 1, y0_0_0_clk: 1) { +component divide(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_0_read_data: 32, x10_0_0_done: 1, y0_0_0_read_data: 32, y0_0_0_done: 1) -> (done: 1, x0_0_0_addr0: 3, x0_0_0_addr1: 3, x0_0_0_addr2: 3, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1, x10_0_0_addr0: 3, x10_0_0_addr1: 3, x10_0_0_addr2: 3, x10_0_0_write_data: 32, x10_0_0_write_en: 1, x10_0_0_clk: 1, y0_0_0_addr0: 3, y0_0_0_addr1: 3, y0_0_0_addr2: 3, y0_0_0_write_data: 32, y0_0_0_write_en: 1, y0_0_0_clk: 1) { cells { add0 = prim std_add(3); add1 = prim std_add(3); @@ -146,31 +146,31 @@ component main () -> () { x1 = prim std_mem_d3(32, 4, 5, 6, 3, 3, 3); x = prim std_mem_d3(32, 4, 5, 6, 3, 3, 3); y = prim std_mem_d3(32, 4, 5, 6, 3, 3, 3); - tensor3d_divide0 = tensor3d_divide; + divide0 = divide; } wires { - group run_tensor3d_divide { - x.addr0 = tensor3d_divide0.x0_0_0_addr0; - tensor3d_divide0.x0_0_0_read_data = x.read_data; - x.addr1 = tensor3d_divide0.x0_0_0_addr1; - x.addr2 = tensor3d_divide0.x0_0_0_addr2; - y.addr0 = tensor3d_divide0.y0_0_0_addr0; - tensor3d_divide0.y0_0_0_read_data = y.read_data; - y.addr1 = tensor3d_divide0.y0_0_0_addr1; - y.addr2 = tensor3d_divide0.y0_0_0_addr2; - x1.addr0 = tensor3d_divide0.x10_0_0_addr0; - x1.addr1 = tensor3d_divide0.x10_0_0_addr1; - x1.addr2 = tensor3d_divide0.x10_0_0_addr2; - x1.write_data = tensor3d_divide0.x10_0_0_write_data; - x1.write_en = tensor3d_divide0.x10_0_0_write_en; - tensor3d_divide0.x10_0_0_done = x1.done; - tensor3d_divide0.go = 1'd1; - run_tensor3d_divide[done] = tensor3d_divide0.done ? 1'd1; + group run_divide { + x.addr0 = divide0.x0_0_0_addr0; + divide0.x0_0_0_read_data = x.read_data; + x.addr1 = divide0.x0_0_0_addr1; + x.addr2 = divide0.x0_0_0_addr2; + y.addr0 = divide0.y0_0_0_addr0; + divide0.y0_0_0_read_data = y.read_data; + y.addr1 = divide0.y0_0_0_addr1; + y.addr2 = divide0.y0_0_0_addr2; + x1.addr0 = divide0.x10_0_0_addr0; + x1.addr1 = divide0.x10_0_0_addr1; + x1.addr2 = divide0.x10_0_0_addr2; + x1.write_data = divide0.x10_0_0_write_data; + x1.write_en = divide0.x10_0_0_write_en; + divide0.x10_0_0_done = x1.done; + divide0.go = 1'd1; + run_divide[done] = divide0.done ? 1'd1; } } control { seq { - run_tensor3d_divide; + run_divide; } } } From 8614cc6f249d0053082ea3cc2feec49ed952abde Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 20 Nov 2020 20:32:28 -0500 Subject: [PATCH 21/75] Fix dahlia name. --- frontends/relay-futil/compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 9f97f7ac11..2d48d270e0 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -64,7 +64,7 @@ def produce_dahlia_name(self, name, type): PrimitiveType.Memory3D: '_0_0', PrimitiveType.Memory4D: '_0_0_0'} dahlia_name = self.id(name) assert type in DahliaNameMapping, f'{name} with {type} is not supported yet.' - return DahliaNameMapping[type] + return dahlia_name + DahliaNameMapping[type] def get_dahlia_declaration(self, function_name, cells, args): """ From 6657d2f763d91fa09893340b08b70c6a3ebeb863 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 20 Nov 2020 20:48:52 -0500 Subject: [PATCH 22/75] Add axis=1. --- frontends/relay-futil/compiler.py | 3 ++- frontends/relay-futil/tests/bias_add.relay | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 2d48d270e0..cf7af47ab6 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -103,7 +103,8 @@ def visit_let(self, let): def visit_constant(self, const): type, shape = const.data.dtype, const.data.shape - name, data, data_type = self.id("const"), [get_bitwidth(type), int(const.data.asnumpy())], get_type(type) + name, data = self.id("const"), [get_bitwidth(type), int(const.data.asnumpy())] + data_type = get_memory_parameters(type) return FCell(primitive=FPrimitive(name=name, data=data, data_type=data_type, type=PrimitiveType.Constant)) def visit_call(self, call): diff --git a/frontends/relay-futil/tests/bias_add.relay b/frontends/relay-futil/tests/bias_add.relay index 6b90020ed3..f71e872e09 100644 --- a/frontends/relay-futil/tests/bias_add.relay +++ b/frontends/relay-futil/tests/bias_add.relay @@ -1,6 +1,6 @@ v0.0.4 fn (%x: Tensor[(1, 64), float32], %bias: Tensor[(64), float32]) { - let %x1: Tensor[(1, 64), float32] = nn.bias_add(%x, %bias); + let %x1: Tensor[(1, 64), float32] = nn.bias_add(%x, %bias, axis=1); %x1 } From b4a373ae9acf4a38c5702f73d67a1112d940a065 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 20 Nov 2020 20:51:22 -0500 Subject: [PATCH 23/75] Add does not work. --- frontends/relay-futil/tests/expand_dims.expect | 1 + 1 file changed, 1 insertion(+) diff --git a/frontends/relay-futil/tests/expand_dims.expect b/frontends/relay-futil/tests/expand_dims.expect index e69de29bb2..7a959bb71d 100644 --- a/frontends/relay-futil/tests/expand_dims.expect +++ b/frontends/relay-futil/tests/expand_dims.expect @@ -0,0 +1 @@ +// Does not work. \ No newline at end of file From 21a4921ffe79bcfebe18353bebee183cb1776fa8 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 20 Nov 2020 21:14:51 -0500 Subject: [PATCH 24/75] Add transforms. --- frontends/relay-futil/compiler.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index cf7af47ab6..0f37c0142d 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -1,3 +1,4 @@ +import tvm from tvm import relay, ir from tvm.relay.expr_functor import ExprFunctor from tvm.relay.function import Function @@ -126,17 +127,22 @@ def visit_function(self, function): return pp_component(self.main) -def infer_type(expr: Function) -> Function: - infer_types_pass = relay.transform.InferType() +def relay_transforms(expr: Function) -> Function: + """https://tvm.apache.org/docs/api/python/relay/transform.html""" + transform = tvm.transform.Sequential([ + relay.transform.SimplifyExpr(), + relay.transform.SimplifyInference(), + relay.transform.InferType() + ]) mod = ir.IRModule() mod['main'] = expr - mod = infer_types_pass(mod) + mod = transform(mod) return mod['main'] def compile(program) -> str: """Translate a Relay function to a FuTIL program (as a string).""" - program = infer_type(program) + program = relay_transforms(program) visitor = Relay2Futil() PREAMBLE = """import "primitives/std.lib";""" From 40b93920f8fac6302b186e12ff0bd9efcb114622 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 21 Nov 2020 08:43:42 -0500 Subject: [PATCH 25/75] Add attributes, bias_add along different axis. --- frontends/relay-futil/compiler.py | 9 ++--- frontends/relay-futil/dahlia_functions.py | 24 ++++++++----- frontends/relay-futil/futil_ast.py | 2 ++ .../relay-futil/tests/data/bias_add.relay | 2 +- .../relay-futil/tests/data/bias_add2.expect | 34 +++++++++++++++++++ .../relay-futil/tests/data/bias_add2.relay | 5 +++ .../tests/data/bias_add2.relay.data | 14 ++++++++ frontends/relay-futil/utilities.py | 14 ++++---- 8 files changed, 84 insertions(+), 20 deletions(-) create mode 100644 frontends/relay-futil/tests/data/bias_add2.expect create mode 100644 frontends/relay-futil/tests/data/bias_add2.relay create mode 100644 frontends/relay-futil/tests/data/bias_add2.relay.data diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 0f37c0142d..d8afe5474b 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -67,7 +67,7 @@ def produce_dahlia_name(self, name, type): assert type in DahliaNameMapping, f'{name} with {type} is not supported yet.' return dahlia_name + DahliaNameMapping[type] - def get_dahlia_declaration(self, function_name, cells, args): + def get_dahlia_declaration(self, function_name, cells, args, attrs): """ Returns the corresponding name, Dahlia function type, and op (if it is a binary op, otherwise None). If the function type isn't supported, fails with an assertion. @@ -82,8 +82,8 @@ def get_dahlia_declaration(self, function_name, cells, args): name = function.__name__ else: assert False, f'{function_name} with type {input_type} is not supported.' - return DahliaDeclaration(component_name=self.relay_id(name), decl_name=self.id(name), op=op, inputs=args, - function=function) + return DahliaDeclaration(component_name=self.relay_id(name), decl_name=self.id(name), + op=op, inputs=args, attributes=attrs, function=function) def visit_var(self, var): name = self.relay_id(var.name_hint) @@ -109,12 +109,13 @@ def visit_constant(self, const): return FCell(primitive=FPrimitive(name=name, data=data, data_type=data_type, type=PrimitiveType.Constant)) def visit_call(self, call): + attributes = call.attrs cells, args = [], [] for arg in call.args: argument = self.visit(arg) cells.append(argument) args.append(argument) - cells.append(FCell(dahlia_declaration=self.get_dahlia_declaration(call.op.name, cells, args))) + cells.append(FCell(dahlia_declaration=self.get_dahlia_declaration(call.op.name, cells, args, call.attrs))) return cells def visit_function(self, function): diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 50dd7100d4..c2c69c61bb 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -136,7 +136,7 @@ def batch_flatten(declaration): def bias_add(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.bias_add""" - # Assumes default value axis=1 is passed in. + axis = declaration.attributes.get_int("axis") data, bias, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive bitwidth = data.data[0] size0, size1, index_size0, index_size1 = data.data[1], data.data[2], data.data[3], data.data[4] @@ -144,13 +144,21 @@ def bias_add(declaration): program = f""" decl {data.name}: {data.data_type}<{bitwidth}>[{size0}][{size1}]; decl {bias.name}: {bias.data_type}<{bitwidth}>[{bias_size}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}]; - for (let i: ubit<{index_size0}> = 0..{size0}) {{ - for (let j: ubit<{index_size1}> = 0..{size1}) {{ - {res.name}[i][j] := {data.name}[i][j] + {bias.name}[j]; - }} - }} - """ + decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}];""" + if axis == 1: + program += f""" + for (let i: ubit<{index_size0}> = 0..{size0}) {{ + for (let j: ubit<{index_size1}> = 0..{size1}) {{ + {res.name}[i][j] := {data.name}[i][j] + {bias.name}[j]; + }} + }}""" + elif axis == 0: + program += f""" + for (let j: ubit<{index_size1}> = 0..{size1}) {{ + for (let i: ubit<{index_size0}> = 0..{size0}) {{ + {res.name}[i][j] := {data.name}[i][j] + {bias.name}[i]; + }} + }}""" return lower_dahlia_program(program, declaration.component_name) diff --git a/frontends/relay-futil/futil_ast.py b/frontends/relay-futil/futil_ast.py index 868e6ffbcf..9432a62a4d 100644 --- a/frontends/relay-futil/futil_ast.py +++ b/frontends/relay-futil/futil_ast.py @@ -1,3 +1,4 @@ +import tvm from dataclasses import dataclass from typing import List, Dict from types import FunctionType @@ -142,6 +143,7 @@ class DahliaDeclaration: op: str = None inputs: List[Cell] = None output: Cell = None + attributes: tvm.ir.Attrs = None function: FunctionType = None program: str = None diff --git a/frontends/relay-futil/tests/data/bias_add.relay b/frontends/relay-futil/tests/data/bias_add.relay index 4a1c58a64a..1f9b35120c 100644 --- a/frontends/relay-futil/tests/data/bias_add.relay +++ b/frontends/relay-futil/tests/data/bias_add.relay @@ -1,5 +1,5 @@ v0.0.4 fn (%x: Tensor[(2, 4), float32], %bias: Tensor[(4), float32]) { - let %x1: Tensor[(2, 4), float32] = nn.bias_add(%x, %bias); + let %x1: Tensor[(2, 4), float32] = nn.bias_add(%x, %bias, axis=1); %x1 } \ No newline at end of file diff --git a/frontends/relay-futil/tests/data/bias_add2.expect b/frontends/relay-futil/tests/data/bias_add2.expect new file mode 100644 index 0000000000..1d6ef587cb --- /dev/null +++ b/frontends/relay-futil/tests/data/bias_add2.expect @@ -0,0 +1,34 @@ +{ + "bias": [ + 42, + 5 + ], + "x": [ + [ + 0, + 0, + 0, + 0 + ], + [ + 0, + 0, + 0, + 0 + ] + ], + "x1": [ + [ + 42, + 42, + 42, + 42 + ], + [ + 5, + 5, + 5, + 5 + ] + ] +} diff --git a/frontends/relay-futil/tests/data/bias_add2.relay b/frontends/relay-futil/tests/data/bias_add2.relay new file mode 100644 index 0000000000..302ede8c97 --- /dev/null +++ b/frontends/relay-futil/tests/data/bias_add2.relay @@ -0,0 +1,5 @@ +v0.0.4 +fn (%x: Tensor[(2, 4), float32], %bias: Tensor[(2), float32]) { + let %x1: Tensor[(2, 4), float32] = nn.bias_add(%x, %bias, axis=0); + %x1 +} \ No newline at end of file diff --git a/frontends/relay-futil/tests/data/bias_add2.relay.data b/frontends/relay-futil/tests/data/bias_add2.relay.data new file mode 100644 index 0000000000..b96b5aa3e7 --- /dev/null +++ b/frontends/relay-futil/tests/data/bias_add2.relay.data @@ -0,0 +1,14 @@ +{ + "x": { + "data": [[0,0,0,0], [0,0,0,0]], + "bitwidth": 32 + }, + "bias": { + "data": [42,5], + "bitwidth": 32 + }, + "x1": { + "data": [[0,0,0,0], [0,0,0,0]], + "bitwidth": 32 + } +} \ No newline at end of file diff --git a/frontends/relay-futil/utilities.py b/frontends/relay-futil/utilities.py index 39a4591af1..faaccf1fe7 100644 --- a/frontends/relay-futil/utilities.py +++ b/frontends/relay-futil/utilities.py @@ -2,6 +2,10 @@ from itertools import chain import math +# Mapping from the tensor dimensions to the corresponding FuTIL memory type. +TensorToMemoryDimensionMapping = {1: PrimitiveType.Memory1D, 2: PrimitiveType.Memory2D, + 3: PrimitiveType.Memory3D, 4: PrimitiveType.Memory4D} + def flatten(l): ''' @@ -60,15 +64,11 @@ def get_memory_parameters(type): string_dimensions = t[t.find("(") + 1:t.find(")")] tensor_dimensions = list(map(int, string_dimensions.split(','))) - data = [get_bitwidth(string_type)] + data, num_dimensions = [get_bitwidth(string_type)], len(tensor_dimensions) + assert num_dimensions in TensorToMemoryDimensionMapping, f'{num_dimensions} dimensions is not supported.' for dimension in tensor_dimensions: data.append(dimension) # Size. for dimension in tensor_dimensions: data.append(int(math.log2(dimension) + 1)) # Index size. - - if len(tensor_dimensions) == 1: primitive_type = PrimitiveType.Memory1D - if len(tensor_dimensions) == 2: primitive_type = PrimitiveType.Memory2D - if len(tensor_dimensions) == 3: primitive_type = PrimitiveType.Memory3D - if len(tensor_dimensions) == 4: primitive_type = PrimitiveType.Memory4D - return data, primitive_type, data_type + return data, TensorToMemoryDimensionMapping[num_dimensions], data_type def build_main_controls(c: FComponent): From 3555c6241d93913a9139489d0f979e8f46edb388 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 21 Nov 2020 11:17:58 -0500 Subject: [PATCH 26/75] Add bias add. --- frontends/relay-futil/dahlia_functions.py | 62 ++++++--- frontends/relay-futil/tests/bias_add.expect | 138 ++++++++++++++------ frontends/relay-futil/tests/bias_add.relay | 4 +- 3 files changed, 143 insertions(+), 61 deletions(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index c2c69c61bb..00eb769fa4 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -31,7 +31,6 @@ def lower_dahlia_program(prog, component_name): (done: 1, X0_addr0: 2, X0_write_data: 32, X0_write_en: 1, X0_clk: 1) { ... } - ''' program_string = '\n'.join(prog.splitlines()) with NamedTemporaryFile() as tf0, NamedTemporaryFile() as tf1, NamedTemporaryFile() as tf2: @@ -139,26 +138,47 @@ def bias_add(declaration): axis = declaration.attributes.get_int("axis") data, bias, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive bitwidth = data.data[0] - size0, size1, index_size0, index_size1 = data.data[1], data.data[2], data.data[3], data.data[4] - bias_size, bias_index_size = bias.data[1], bias.data[2] - program = f""" - decl {data.name}: {data.data_type}<{bitwidth}>[{size0}][{size1}]; - decl {bias.name}: {bias.data_type}<{bitwidth}>[{bias_size}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}];""" - if axis == 1: - program += f""" - for (let i: ubit<{index_size0}> = 0..{size0}) {{ - for (let j: ubit<{index_size1}> = 0..{size1}) {{ - {res.name}[i][j] := {data.name}[i][j] + {bias.name}[j]; - }} - }}""" - elif axis == 0: - program += f""" - for (let j: ubit<{index_size1}> = 0..{size1}) {{ - for (let i: ubit<{index_size0}> = 0..{size0}) {{ - {res.name}[i][j] := {data.name}[i][j] + {bias.name}[i]; - }} - }}""" + if data.type == PrimitiveType.Memory2D: + size0, size1, index_size0, index_size1 = data.data[1], data.data[2], data.data[3], data.data[4] + bias_size, bias_index_size = bias.data[1], bias.data[2] + program = f""" + decl {data.name}: {data.data_type}<{bitwidth}>[{size0}][{size1}]; + decl {bias.name}: {bias.data_type}<{bitwidth}>[{bias_size}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}];""" + if axis == 1: + program += f""" + for (let i: ubit<{index_size0}> = 0..{size0}) {{ + for (let j: ubit<{index_size1}> = 0..{size1}) {{ + {res.name}[i][j] := {data.name}[i][j] + {bias.name}[j]; + }} + }}""" + elif axis == 0: + program += f""" + for (let j: ubit<{index_size1}> = 0..{size1}) {{ + for (let i: ubit<{index_size0}> = 0..{size0}) {{ + {res.name}[i][j] := {data.name}[i][j] + {bias.name}[i]; + }} + }}""" + elif data.type == PrimitiveType.Memory4D: + bitwidth, size0, size1, size2, size3 = data.data[0], data.data[1], data.data[2], data.data[3], data.data[4] + index_size0, index_size1, index_size2, index_size3 = data.data[5], data.data[6], data.data[7], data.data[8] + bias_size, bias_index_size = bias.data[1], bias.data[2] + program = f""" + decl {data.name}: {data.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; + decl {bias.name}: {bias.data_type}<{bitwidth}>[{bias_size}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}];""" + if axis == 1: + program += f""" + for (let i: ubit<{index_size0}> = 0..{size0}) {{ + for (let j: ubit<{index_size1}> = 0..{size1}) {{ + for (let k: ubit<{index_size2}> = 0..{size2}) {{ + for (let l: ubit<{index_size3}> = 0..{size3}) {{ + {res.name}[i][j][k][l] := {data.name}[i][j][k][l] + {bias.name}[j]; + }} + }} + }} + }}""" + return lower_dahlia_program(program, declaration.component_name) diff --git a/frontends/relay-futil/tests/bias_add.expect b/frontends/relay-futil/tests/bias_add.expect index 08566e1f14..562f55dfe0 100644 --- a/frontends/relay-futil/tests/bias_add.expect +++ b/frontends/relay-futil/tests/bias_add.expect @@ -1,21 +1,33 @@ import "primitives/std.lib"; -component bias_add(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, bias0_addr0: 7, bias0_write_data: 32, bias0_write_en: 1, bias0_clk: 1, x0_0_addr0: 1, x0_0_addr1: 7, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 7, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { +component bias_add(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, bias0_addr0: 7, bias0_write_data: 32, bias0_write_en: 1, bias0_clk: 1, x0_0_0_0_addr0: 1, x0_0_0_0_addr1: 7, x0_0_0_0_addr2: 10, x0_0_0_0_addr3: 9, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 1, x10_0_0_0_addr1: 7, x10_0_0_0_addr2: 10, x10_0_0_0_addr3: 9, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { cells { add0 = prim fixed_p_std_add(32, 16, 16); - add1 = prim std_add(7); - add2 = prim std_add(1); + add1 = prim std_add(9); + add2 = prim std_add(10); + add3 = prim std_add(7); + add4 = prim std_add(1); bias_read0_0 = prim std_reg(32); const0 = prim std_const(1, 0); const1 = prim std_const(1, 0); + const10 = prim std_const(7, 1); + const11 = prim std_const(1, 1); const2 = prim std_const(7, 0); const3 = prim std_const(7, 63); - const4 = prim std_const(7, 1); - const5 = prim std_const(1, 1); + const4 = prim std_const(10, 0); + const5 = prim std_const(10, 511); + const6 = prim std_const(9, 0); + const7 = prim std_const(9, 255); + const8 = prim std_const(9, 1); + const9 = prim std_const(10, 1); i0 = prim std_reg(1); j0 = prim std_reg(7); + k0 = prim std_reg(10); + l0 = prim std_reg(9); le0 = prim std_le(1); le1 = prim std_le(7); + le2 = prim std_le(10); + le3 = prim std_le(9); x_read0_0 = prim std_reg(32); } wires { @@ -29,6 +41,16 @@ component bias_add(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_read_ le1.left = j0.out; le1.right = const3.out; } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = k0.out; + le2.right = const5.out; + } + group cond3<"static"=0> { + cond3[done] = 1'd1; + le3.left = l0.out; + le3.right = const7.out; + } group let0<"static"=1> { i0.in = const0.out; i0.write_en = 1'd1; @@ -39,11 +61,23 @@ component bias_add(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_read_ j0.write_en = 1'd1; let1[done] = j0.done; } + group let2<"static"=1> { + k0.in = const4.out; + k0.write_en = 1'd1; + let2[done] = k0.done; + } + group let3<"static"=1> { + l0.in = const6.out; + l0.write_en = 1'd1; + let3[done] = l0.done; + } group upd0<"static"=1> { x_read0_0.write_en = 1'd1; - x0_0_addr1 = j0.out; - x0_0_addr0 = i0.out; - x_read0_0.in = 1'd1 ? x0_0_read_data; + x0_0_0_0_addr3 = l0.out; + x0_0_0_0_addr2 = k0.out; + x0_0_0_0_addr1 = j0.out; + x0_0_0_0_addr0 = i0.out; + x_read0_0.in = 1'd1 ? x0_0_0_0_read_data; upd0[done] = x_read0_0.done ? 1'd1; } group upd1<"static"=1> { @@ -53,27 +87,43 @@ component bias_add(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_read_ upd1[done] = bias_read0_0.done ? 1'd1; } group upd2<"static"=1> { - x10_0_addr1 = j0.out; - x10_0_addr0 = i0.out; - x10_0_write_en = 1'd1; + x10_0_0_0_addr3 = l0.out; + x10_0_0_0_addr2 = k0.out; + x10_0_0_0_addr1 = j0.out; + x10_0_0_0_addr0 = i0.out; + x10_0_0_0_write_en = 1'd1; add0.left = x_read0_0.out; add0.right = bias_read0_0.out; - x10_0_write_data = 1'd1 ? add0.out; - upd2[done] = x10_0_done ? 1'd1; + x10_0_0_0_write_data = 1'd1 ? add0.out; + upd2[done] = x10_0_0_0_done ? 1'd1; } group upd3<"static"=1> { - j0.write_en = 1'd1; - add1.left = j0.out; - add1.right = const4.out; - j0.in = 1'd1 ? add1.out; - upd3[done] = j0.done ? 1'd1; + l0.write_en = 1'd1; + add1.left = l0.out; + add1.right = const8.out; + l0.in = 1'd1 ? add1.out; + upd3[done] = l0.done ? 1'd1; } group upd4<"static"=1> { + k0.write_en = 1'd1; + add2.left = k0.out; + add2.right = const9.out; + k0.in = 1'd1 ? add2.out; + upd4[done] = k0.done ? 1'd1; + } + group upd5<"static"=1> { + j0.write_en = 1'd1; + add3.left = j0.out; + add3.right = const10.out; + j0.in = 1'd1 ? add3.out; + upd5[done] = j0.done ? 1'd1; + } + group upd6<"static"=1> { i0.write_en = 1'd1; - add2.left = i0.out; - add2.right = const5.out; - i0.in = 1'd1 ? add2.out; - upd4[done] = i0.done ? 1'd1; + add4.left = i0.out; + add4.right = const11.out; + i0.in = 1'd1 ? add4.out; + upd6[done] = i0.done ? 1'd1; } } @@ -85,15 +135,27 @@ component bias_add(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_read_ let1; while le1.out with cond1 { seq { - par { - upd0; - upd1; + let2; + while le2.out with cond2 { + seq { + let3; + while le3.out with cond3 { + seq { + par { + upd0; + upd1; + } + upd2; + upd3; + } + } + upd4; + } } - upd2; - upd3; + upd5; } } - upd4; + upd6; } } } @@ -102,23 +164,23 @@ component bias_add(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_read_ component main () -> () { cells { - x1 = prim std_mem_d2(32, 1, 64, 1, 7); - x = prim std_mem_d2(32, 1, 64, 1, 7); + x1 = prim std_mem_d4(32, 1, 64, 512, 256, 256, 1, 7, 10); + x = prim std_mem_d4(32, 1, 64, 512, 256, 256, 1, 7, 10); bias = prim std_mem_d1(32, 64, 7); bias_add0 = bias_add; } wires { group run_bias_add { - x.addr0 = bias_add0.x0_0_addr0; - bias_add0.x0_0_read_data = x.read_data; - x.addr1 = bias_add0.x0_0_addr1; + x.addr0 = bias_add0.x0_0_0_0_addr0; + bias_add0.x0_0_0_0_read_data = x.read_data; + x.addr1 = bias_add0.x0_0_0_0_addr1; + x.addr2 = bias_add0.x0_0_0_0_addr2; bias.addr0 = bias_add0.bias0_addr0; bias_add0.bias0_read_data = bias.read_data; - x1.addr0 = bias_add0.x10_0_addr0; - x1.addr1 = bias_add0.x10_0_addr1; - x1.write_data = bias_add0.x10_0_write_data; - x1.write_en = bias_add0.x10_0_write_en; - bias_add0.x10_0_done = x1.done; + x1.addr0 = bias_add0.x10_0_0_0_addr0; + x1.write_data = bias_add0.x10_0_0_0_write_data; + x1.write_en = bias_add0.x10_0_0_0_write_en; + bias_add0.x10_0_0_0_done = x1.done; bias_add0.go = 1'd1; run_bias_add[done] = bias_add0.done ? 1'd1; } diff --git a/frontends/relay-futil/tests/bias_add.relay b/frontends/relay-futil/tests/bias_add.relay index f71e872e09..84d9c6d54e 100644 --- a/frontends/relay-futil/tests/bias_add.relay +++ b/frontends/relay-futil/tests/bias_add.relay @@ -1,6 +1,6 @@ v0.0.4 -fn (%x: Tensor[(1, 64), float32], %bias: Tensor[(64), float32]) { - let %x1: Tensor[(1, 64), float32] = nn.bias_add(%x, %bias, axis=1); +fn (%x: Tensor[(1, 64, 512, 256), float32], %bias: Tensor[(64), float32]) { + let %x1: Tensor[(1, 64, 512, 256), float32] = nn.bias_add(%x, %bias, axis=1); %x1 } From a959fa5992de374edc0a414c7bfc8945c3d8336c Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 21 Nov 2020 13:00:13 -0500 Subject: [PATCH 27/75] Fix name ordering for visit let. --- frontends/relay-futil/compiler.py | 3 ++- frontends/relay-futil/dahlia_functions.py | 14 ++++++++------ frontends/relay-futil/example.py | 2 +- frontends/relay-futil/tests/expand_dims.expect | 1 - frontends/relay-futil/tests/expand_dims.relay | 6 ------ 5 files changed, 11 insertions(+), 15 deletions(-) delete mode 100644 frontends/relay-futil/tests/expand_dims.expect delete mode 100644 frontends/relay-futil/tests/expand_dims.relay diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index d8afe5474b..77e23e4a6e 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -95,11 +95,12 @@ def visit_var(self, var): primitive=FPrimitive(name=name, data=data, data_type=data_type, type=type)) def visit_let(self, let): - output, body, values = self.visit(let.var), self.visit(let.body), self.visit(let.value) + values, output = self.visit(let.value), self.visit(let.var) for value in values: if not value.is_dahlia_declaration(): continue value.dahlia_declaration.output = output value.dahlia_declaration.invoke() + body = self.visit(let.body) return [body, values] def visit_constant(self, const): diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 00eb769fa4..a197bb93ec 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -258,17 +258,19 @@ def negative(declaration): def expand_dims(declaration): """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.expand_dims""" - op1, res = declaration.inputs[0].primitive, declaration.output.primitive - bitwidth, size, index_size = op1.data[0], op1.data[1], op1.data[2] + axis, num_newaxis = declaration.attributes.get_int("axis"), declaration.attributes.get_int("num_newaxis") + data, res = declaration.inputs[0].primitive, declaration.output.primitive + bitwidth, size, index_size = data.data[0], data.data[1], data.data[2] size0, size1, size2 = res.data[1], res.data[2], res.data[3] index_size0, index_size1, index_size2 = res.data[4], res.data[5], res.data[6] - program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{size}]; + program = f"""decl {data.name}: {data.data_type}<{bitwidth}>[{size}];""" + if axis == 1 and num_newaxis == 2: + program += f""" decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}][{size2}]; for (let i: ubit<{index_size}> = 0..{size}) {{ - {res.name}[i][0][0] := {op1.name}[i]; + {res.name}[i][0][0] := {data.name}[i]; }} - """ + """ return lower_dahlia_program(program, declaration.component_name) diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index 779493b1a4..f7e8526627 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -17,7 +17,7 @@ def tensor_subtract(): def expand_dims(): - x = relay.var('x', shape=[4], dtype='int32') + x = relay.var('x', shape=[512], dtype='int32') return relay.Function([x], relay.expand_dims(x, axis=1, num_newaxis=2)) diff --git a/frontends/relay-futil/tests/expand_dims.expect b/frontends/relay-futil/tests/expand_dims.expect deleted file mode 100644 index 7a959bb71d..0000000000 --- a/frontends/relay-futil/tests/expand_dims.expect +++ /dev/null @@ -1 +0,0 @@ -// Does not work. \ No newline at end of file diff --git a/frontends/relay-futil/tests/expand_dims.relay b/frontends/relay-futil/tests/expand_dims.relay deleted file mode 100644 index 47ae5ce31a..0000000000 --- a/frontends/relay-futil/tests/expand_dims.relay +++ /dev/null @@ -1,6 +0,0 @@ -v0.0.4 -fn (%x: Tensor[(4), int32]) { - let %x1 = expand_dims(%x, axis=1, num_newaxis=2); - %x1 -} - From 2c8b72ec6b3502b20a39acfbfaa62d45588afbd1 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 21 Nov 2020 13:22:31 -0500 Subject: [PATCH 28/75] Cleanup! --- frontends/relay-futil/compiler.py | 6 +++--- frontends/relay-futil/dahlia_functions.py | 5 +++-- frontends/relay-futil/example.py | 6 +++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 77e23e4a6e..1459bdef64 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -136,7 +136,7 @@ def relay_transforms(expr: Function) -> Function: relay.transform.SimplifyInference(), relay.transform.InferType() ]) - mod = ir.IRModule() + mod = ir.IRModule.from_expr(expr) mod['main'] = expr mod = transform(mod) return mod['main'] @@ -157,5 +157,5 @@ def compile(program) -> str: if __name__ == '__main__': import sys - relay_func = relay.fromtext(sys.stdin.read()) - print(compile(relay_func)) + relay_function = relay.fromtext(sys.stdin.read()) + print(compile(relay_function)) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index a197bb93ec..029cde5f4b 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -263,14 +263,15 @@ def expand_dims(declaration): bitwidth, size, index_size = data.data[0], data.data[1], data.data[2] size0, size1, size2 = res.data[1], res.data[2], res.data[3] index_size0, index_size1, index_size2 = res.data[4], res.data[5], res.data[6] - program = f"""decl {data.name}: {data.data_type}<{bitwidth}>[{size}];""" if axis == 1 and num_newaxis == 2: - program += f""" + program = f""" + decl {data.name}: {data.data_type}<{bitwidth}>[{size}]; decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}][{size2}]; for (let i: ubit<{index_size}> = 0..{size}) {{ {res.name}[i][0][0] := {data.name}[i]; }} """ + print(program) return lower_dahlia_program(program, declaration.component_name) diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index f7e8526627..95c8dd5cca 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -87,13 +87,13 @@ def run_example(): mod_opt = tvm.IRModule.from_expr(func) mod_opt = seq(mod_opt) - func = mod_opt['main'] + relay_IR = mod_opt['main'] if '-r' in input: # Dump the Relay representation (for educational purposes). - print(func) + print(relay_IR) else: # Compile the function and print the FuTIL. - print(compile(func)) + print(compile(relay_IR)) if __name__ == '__main__': From 77f01c488a263a6b1f7e36e47139f4859d84a678 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 21 Nov 2020 13:24:07 -0500 Subject: [PATCH 29/75] More cleanup. --- frontends/relay-futil/compiler.py | 4 ++-- frontends/relay-futil/dahlia_functions.py | 1 - frontends/relay-futil/example.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 1459bdef64..1857b7a0ad 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -142,7 +142,7 @@ def relay_transforms(expr: Function) -> Function: return mod['main'] -def compile(program) -> str: +def lower_to_futil(program) -> str: """Translate a Relay function to a FuTIL program (as a string).""" program = relay_transforms(program) visitor = Relay2Futil() @@ -158,4 +158,4 @@ def compile(program) -> str: import sys relay_function = relay.fromtext(sys.stdin.read()) - print(compile(relay_function)) + print(lower_to_futil(relay_function)) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 029cde5f4b..0ddb778b16 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -271,7 +271,6 @@ def expand_dims(declaration): {res.name}[i][0][0] := {data.name}[i]; }} """ - print(program) return lower_dahlia_program(program, declaration.component_name) diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index 95c8dd5cca..c09c4f3c36 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -93,7 +93,7 @@ def run_example(): print(relay_IR) else: # Compile the function and print the FuTIL. - print(compile(relay_IR)) + print(lower_to_futil(relay_IR)) if __name__ == '__main__': From e5aa60d3e4a2d1da02dd8576535781064a843817 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 21 Nov 2020 14:22:34 -0500 Subject: [PATCH 30/75] Add element-wise with a single value. --- frontends/relay-futil/compiler.py | 5 ++-- frontends/relay-futil/dahlia_functions.py | 28 +++++++++++++------ frontends/relay-futil/example.py | 7 ++++- frontends/relay-futil/pretty_print.py | 5 ++-- frontends/relay-futil/tests/add.relay | 5 ++-- frontends/relay-futil/tests/data/sub.expect | 18 ++++++++---- frontends/relay-futil/tests/data/sub.relay | 7 +++-- .../relay-futil/tests/data/sub.relay.data | 12 ++++---- 8 files changed, 56 insertions(+), 31 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 1857b7a0ad..3a60bced51 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -96,14 +96,15 @@ def visit_var(self, var): def visit_let(self, let): values, output = self.visit(let.value), self.visit(let.var) + if not isinstance(values, list): return [self.visit(let.body), values] for value in values: if not value.is_dahlia_declaration(): continue value.dahlia_declaration.output = output value.dahlia_declaration.invoke() - body = self.visit(let.body) - return [body, values] + return [self.visit(let.body), values] def visit_constant(self, const): + # Note: We're currently treating constants defined in a `let` statement in Relay IR as 1D Memory. type, shape = const.data.dtype, const.data.shape name, data = self.id("const"), [get_bitwidth(type), int(const.data.asnumpy())] data_type = get_memory_parameters(type) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 0ddb778b16..ba4c903aee 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -47,14 +47,26 @@ def lower_dahlia_program(prog, component_name): def tensor1d_op(declaration): op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive - bitwidth, size, index_size = op1.data[0], op1.data[1], op1.data[2] - program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{size}]; - decl {op2.name}: {op2.data_type}<{bitwidth}>[{size}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{size}]; - for (let i: ubit<{index_size}> = 0..{size}) {{ - {res.name}[i] := {op1.name}[i] {declaration.op} {op2.name}[i]; - }}""" + bitwidth, size, index_size, op2_size = op1.data[0], op1.data[1], op1.data[2], op2.data[1] + if op2_size != size: + # Element-wise operation using a single value, e.g. + # let %a = 42; + # let %c = add(%b: Tensor[(512)], %a); + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{size}]; + decl {op2.name}: {op2.data_type}<{bitwidth}>[{op2_size}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{size}]; + for (let i: ubit<{index_size}> = 0..{size}) {{ + {res.name}[i] := {op1.name}[i] {declaration.op} {op2.name}[0]; + }}""" + else: + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{size}]; + decl {op2.name}: {op2.data_type}<{bitwidth}>[{size}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{size}]; + for (let i: ubit<{index_size}> = 0..{size}) {{ + {res.name}[i] := {op1.name}[i] {declaration.op} {op2.name}[i]; + }}""" return lower_dahlia_program(program, declaration.component_name) diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index c09c4f3c36..116f91b887 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -10,6 +10,11 @@ def add(): return relay.Function([x, y], relay.add(x, y)) +def add2(): + x = relay.var('x', shape=(), dtype="int32") + return relay.Function([x], relay.add(x, relay.const(42))) + + def tensor_subtract(): x = relay.var("x", relay.TensorType((2, 4), "int32")) y = relay.var("y", relay.TensorType((2, 4), "int32")) @@ -56,7 +61,7 @@ def vgg_net(): batch_norm=True) -ALL_FUNCS = [add, tensor_subtract, expand_dims, batch_flatten, batch_matmul, bias_add, relu, mlp_net, vgg_net] +ALL_FUNCS = [add, add2, tensor_subtract, expand_dims, batch_flatten, batch_matmul, bias_add, relu, mlp_net, vgg_net] FUNC_NAMES = list(map(lambda x: x.__name__, ALL_FUNCS)) diff --git a/frontends/relay-futil/pretty_print.py b/frontends/relay-futil/pretty_print.py index 6c65dea0dc..8a2b13a1d9 100644 --- a/frontends/relay-futil/pretty_print.py +++ b/frontends/relay-futil/pretty_print.py @@ -73,10 +73,9 @@ def pp_component(component: FComponent): def pp_cell(cell: FCell): if cell.is_primitive(): data = cell.primitive.data - data_type = cell.primitive.data_type - if data_type == 'ubit' or data_type == 'bit': bitwidth = str(data[0]) + data_type, bitwidth = cell.primitive.data_type, data[0] # `fix` / `ufix` will have bitwidth form: . We only want TotalWidth. - if data_type == 'ufix' or data_type == 'fix': bitwidth = str(data[0]).split(',')[0] + if data_type == 'ufix' or data_type == 'fix': bitwidth = str(bitwidth).split(',')[0] if cell.primitive.type == PrimitiveType.Register: return f'{cell.primitive.name} = prim std_reg({bitwidth});' if cell.primitive.type == PrimitiveType.Constant: diff --git a/frontends/relay-futil/tests/add.relay b/frontends/relay-futil/tests/add.relay index 378c62b4fd..51e09388ed 100644 --- a/frontends/relay-futil/tests/add.relay +++ b/frontends/relay-futil/tests/add.relay @@ -1,5 +1,6 @@ v0.0.4 -fn (%x: int32, %y: int32) { - let %z = add(%x, %y); +fn (%x: Tensor[(512), int32]) { + let %l: int32 = 42; + let %z = add(%x, %l); %z } diff --git a/frontends/relay-futil/tests/data/sub.expect b/frontends/relay-futil/tests/data/sub.expect index e313c7824d..523f8c2430 100644 --- a/frontends/relay-futil/tests/data/sub.expect +++ b/frontends/relay-futil/tests/data/sub.expect @@ -1,11 +1,17 @@ { - "a": [ - 49 + "x": [ + 16, + 16, + 16, + 16 ], - "b": [ - 7 + "x1": [ + 8, + 8, + 8, + 8 ], - "c": [ - 42 + "y": [ + 8 ] } diff --git a/frontends/relay-futil/tests/data/sub.relay b/frontends/relay-futil/tests/data/sub.relay index 0e0df9fb24..f73c4da85d 100644 --- a/frontends/relay-futil/tests/data/sub.relay +++ b/frontends/relay-futil/tests/data/sub.relay @@ -1,5 +1,6 @@ v0.0.4 -fn (%a: int32, %b: int32) { - let %c = subtract(%a, %b); - %c +fn (%x: Tensor[(4), int32]) { + let %y = 8; + let %x1 = subtract(%x, %y); + %x1 } diff --git a/frontends/relay-futil/tests/data/sub.relay.data b/frontends/relay-futil/tests/data/sub.relay.data index 219d0fa867..6f7ef00e0f 100644 --- a/frontends/relay-futil/tests/data/sub.relay.data +++ b/frontends/relay-futil/tests/data/sub.relay.data @@ -1,14 +1,14 @@ { - "a": { - "data": [49], + "x": { + "data": [16, 16, 16, 16], "bitwidth": 32 }, - "b": { - "data": [7], + "y": { + "data": [8], "bitwidth": 32 }, - "c": { - "data": [0], + "x1": { + "data": [0, 0, 0, 0], "bitwidth": 32 } } \ No newline at end of file From 5a317313e10dc6163f3573fd6d8b566458c9e457 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 21 Nov 2020 16:53:08 -0500 Subject: [PATCH 31/75] Revert add. --- frontends/relay-futil/tests/add.relay | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/frontends/relay-futil/tests/add.relay b/frontends/relay-futil/tests/add.relay index 51e09388ed..378c62b4fd 100644 --- a/frontends/relay-futil/tests/add.relay +++ b/frontends/relay-futil/tests/add.relay @@ -1,6 +1,5 @@ v0.0.4 -fn (%x: Tensor[(512), int32]) { - let %l: int32 = 42; - let %z = add(%x, %l); +fn (%x: int32, %y: int32) { + let %z = add(%x, %y); %z } From 6a9f170fc8aedad4e691369a226327d18395aad5 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 21 Nov 2020 22:04:42 -0500 Subject: [PATCH 32/75] Fix PP for D4. --- frontends/relay-futil/dahlia_functions.py | 59 +++++++++++++++-------- frontends/relay-futil/example.py | 8 +-- frontends/relay-futil/pretty_print.py | 8 +-- 3 files changed, 44 insertions(+), 31 deletions(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index ba4c903aee..0b42dfa13f 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -48,24 +48,24 @@ def lower_dahlia_program(prog, component_name): def tensor1d_op(declaration): op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive bitwidth, size, index_size, op2_size = op1.data[0], op1.data[1], op1.data[2], op2.data[1] - if op2_size != size: - # Element-wise operation using a single value, e.g. - # let %a = 42; - # let %c = add(%b: Tensor[(512)], %a); + if size == op2_size: program = f""" decl {op1.name}: {op1.data_type}<{bitwidth}>[{size}]; - decl {op2.name}: {op2.data_type}<{bitwidth}>[{op2_size}]; + decl {op2.name}: {op2.data_type}<{bitwidth}>[{size}]; decl {res.name}: {res.data_type}<{bitwidth}>[{size}]; for (let i: ubit<{index_size}> = 0..{size}) {{ - {res.name}[i] := {op1.name}[i] {declaration.op} {op2.name}[0]; + {res.name}[i] := {op1.name}[i] {declaration.op} {op2.name}[i]; }}""" else: + # Broadcasting using a single value, e.g. + # let %a = 42; + # let %c = add(%b: Tensor[(512)], %a); program = f""" decl {op1.name}: {op1.data_type}<{bitwidth}>[{size}]; - decl {op2.name}: {op2.data_type}<{bitwidth}>[{size}]; + decl {op2.name}: {op2.data_type}<{bitwidth}>[{op2_size}]; decl {res.name}: {res.data_type}<{bitwidth}>[{size}]; for (let i: ubit<{index_size}> = 0..{size}) {{ - {res.name}[i] := {op1.name}[i] {declaration.op} {op2.name}[i]; + {res.name}[i] := {op1.name}[i] {declaration.op} {op2.name}[0]; }}""" return lower_dahlia_program(program, declaration.component_name) @@ -107,19 +107,38 @@ def tensor4d_op(declaration): op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive bitwidth, size0, size1, size2, size3 = op1.data[0], op1.data[1], op1.data[2], op1.data[3], op1.data[4] index_size0, index_size1, index_size2, index_size3 = op1.data[5], op1.data[6], op1.data[7], op1.data[8] - program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; - decl {op2.name}: {op2.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; - for (let i: ubit<{index_size0}> = 0..{size0}) {{ - for (let j: ubit<{index_size1}> = 0..{size1}) {{ - for (let k: ubit<{index_size2}> = 0..{size2}) {{ - for (let l: ubit<{index_size3}> = 0..{size3}) {{ - {res.name}[i][j][k][l] := {op1.name}[i][j][k][l] {declaration.op} {op2.name}[i][j][k][l]; + if op1.type == op2.type: + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; + decl {op2.name}: {op2.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; + for (let i: ubit<{index_size0}> = 0..{size0}) {{ + for (let j: ubit<{index_size1}> = 0..{size1}) {{ + for (let k: ubit<{index_size2}> = 0..{size2}) {{ + for (let l: ubit<{index_size3}> = 0..{size3}) {{ + {res.name}[i][j][k][l] := {op1.name}[i][j][k][l] {declaration.op} {op2.name}[i][j][k][l]; + }} + }} }} - }} - }} - }}""" + }}""" + else: # Broadcasting. + op2_size0, op2_size1, op2_size2 = op2.data[1], op2.data[2], op2.data[3] + op2_index_size0, op2_index_size1, op2_index_size2 = op2.data[3], op2.data[5], op2.data[6] + # TODO(cgyurgyik): This is defaulted to: `Tensor(X, Y, 1, 1) op Tensor(Y, 1, 1)` for VGG Net. + # This should be generalized. + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; + decl {op2.name}: {op2.data_type}<{bitwidth}>[{op2_size0}][{op2_size1}][{op2_size2}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; + for (let i: ubit<{index_size0}> = 0..{size0}) {{ + for (let j: ubit<{index_size1}> = 0..{size1}) {{ + for (let k: ubit<{index_size2}> = 0..{size2}) {{ + for (let l: ubit<{index_size3}> = 0..{size3}) {{ + {res.name}[i][j][k][l] := {op1.name}[i][j][k][l] {declaration.op} {op2.name}[j][0][0]; + }} + }} + }} + }}""" return lower_dahlia_program(program, declaration.component_name) diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index 116f91b887..45e18b387c 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -9,12 +9,6 @@ def add(): y = relay.var('y', shape=(), dtype="int32") return relay.Function([x, y], relay.add(x, y)) - -def add2(): - x = relay.var('x', shape=(), dtype="int32") - return relay.Function([x], relay.add(x, relay.const(42))) - - def tensor_subtract(): x = relay.var("x", relay.TensorType((2, 4), "int32")) y = relay.var("y", relay.TensorType((2, 4), "int32")) @@ -61,7 +55,7 @@ def vgg_net(): batch_norm=True) -ALL_FUNCS = [add, add2, tensor_subtract, expand_dims, batch_flatten, batch_matmul, bias_add, relu, mlp_net, vgg_net] +ALL_FUNCS = [add, tensor_subtract, expand_dims, batch_flatten, batch_matmul, bias_add, relu, mlp_net, vgg_net] FUNC_NAMES = list(map(lambda x: x.__name__, ALL_FUNCS)) diff --git a/frontends/relay-futil/pretty_print.py b/frontends/relay-futil/pretty_print.py index 8a2b13a1d9..6c5b03dbd5 100644 --- a/frontends/relay-futil/pretty_print.py +++ b/frontends/relay-futil/pretty_print.py @@ -106,10 +106,10 @@ def pp_cell(cell: FCell): size1 = str(data[2]) size2 = str(data[3]) size3 = str(data[4]) - index_size0 = str(data[4]) - index_size1 = str(data[5]) - index_size2 = str(data[6]) - index_size3 = str(data[7]) + index_size0 = str(data[5]) + index_size1 = str(data[6]) + index_size2 = str(data[7]) + index_size3 = str(data[8]) return f'{cell.primitive.name} = prim std_mem_d4({bitwidth}, ' \ f'{size0}, {size1}, {size2}, {size3}, {index_size0}, {index_size1}, {index_size2}, {index_size3});' if cell.primitive.type == PrimitiveType.BinOp: From 655a439a128f1e121c42e3d82fb930e81248e849 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 21 Nov 2020 22:08:11 -0500 Subject: [PATCH 33/75] Fix 4d case. --- frontends/relay-futil/tests/bias_add.expect | 4 ++-- frontends/relay-futil/tests/relu.expect | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/frontends/relay-futil/tests/bias_add.expect b/frontends/relay-futil/tests/bias_add.expect index 562f55dfe0..620da35d44 100644 --- a/frontends/relay-futil/tests/bias_add.expect +++ b/frontends/relay-futil/tests/bias_add.expect @@ -164,8 +164,8 @@ component bias_add(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_0_0_r component main () -> () { cells { - x1 = prim std_mem_d4(32, 1, 64, 512, 256, 256, 1, 7, 10); - x = prim std_mem_d4(32, 1, 64, 512, 256, 256, 1, 7, 10); + x1 = prim std_mem_d4(32, 1, 64, 512, 256, 1, 7, 10, 9); + x = prim std_mem_d4(32, 1, 64, 512, 256, 1, 7, 10, 9); bias = prim std_mem_d1(32, 64, 7); bias_add0 = bias_add; } diff --git a/frontends/relay-futil/tests/relu.expect b/frontends/relay-futil/tests/relu.expect index a0998dd229..7c2ac6e96a 100644 --- a/frontends/relay-futil/tests/relu.expect +++ b/frontends/relay-futil/tests/relu.expect @@ -192,8 +192,8 @@ component relu(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_ component main () -> () { cells { - x1 = prim std_mem_d4(32, 2, 4, 8, 32, 32, 2, 3, 4); - x = prim std_mem_d4(32, 2, 4, 8, 32, 32, 2, 3, 4); + x1 = prim std_mem_d4(32, 2, 4, 8, 32, 2, 3, 4, 6); + x = prim std_mem_d4(32, 2, 4, 8, 32, 2, 3, 4, 6); relu0 = relu; } wires { From caf01128e753ee8f01ea2f6e773d20bc7007012b Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sun, 22 Nov 2020 08:19:25 -0500 Subject: [PATCH 34/75] Remove extra comma. --- primitives/std.lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/primitives/std.lib b/primitives/std.lib index 7f4d887efa..ecd352640a 100644 --- a/primitives/std.lib +++ b/primitives/std.lib @@ -187,7 +187,7 @@ primitive std_mem_d4[ parameter d0_idx_size = 4, parameter d1_idx_size = 4, parameter d2_idx_size = 4, - parameter d3_idx_size = 4,) + parameter d3_idx_size = 4) (input logic [d0_idx_size-1:0] addr0, input logic [d1_idx_size-1:0] addr1, input logic [d2_idx_size-1:0] addr2, From f0496458224dd37bc32972217e21f7ef945764bc Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sun, 22 Nov 2020 08:35:21 -0500 Subject: [PATCH 35/75] Simplify expr. --- frontends/relay-futil/compiler.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 3a60bced51..6026dcfcb2 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -96,11 +96,11 @@ def visit_var(self, var): def visit_let(self, let): values, output = self.visit(let.value), self.visit(let.var) - if not isinstance(values, list): return [self.visit(let.body), values] - for value in values: - if not value.is_dahlia_declaration(): continue - value.dahlia_declaration.output = output - value.dahlia_declaration.invoke() + if isinstance(values, list): + for value in values: + if not value.is_dahlia_declaration(): continue + value.dahlia_declaration.output = output + value.dahlia_declaration.invoke() return [self.visit(let.body), values] def visit_constant(self, const): From 318b6f5d84696bcee05898a5f1b50f7edde144f0 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sun, 22 Nov 2020 08:37:17 -0500 Subject: [PATCH 36/75] Change to dahlia_name. --- frontends/relay-futil/compiler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 6026dcfcb2..125d0d8092 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -54,7 +54,7 @@ def relay_id(self, name): if id_number == 0: return name return name + str(id_number) - def produce_dahlia_name(self, name, type): + def dahlia_name(self, name, type): """ Dahlia uses the following naming scheme for an arbitrary variable 'X': Memory1D: 'X0', 'X1', 'X2', ... @@ -90,7 +90,7 @@ def visit_var(self, var): # Do not add duplicate primitives to main. if self.main.contains_primitive(name): return cell data, type, data_type = get_memory_parameters(var.type_annotation) - dahlia_name = self.produce_dahlia_name(name, type) + dahlia_name = self.dahlia_name(name, type) return FCell(dahlia_name=dahlia_name, primitive=FPrimitive(name=name, data=data, data_type=data_type, type=type)) From 6305a0f5e399c38b3ed41cd2cadce2fa1cade79b Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sun, 22 Nov 2020 08:48:20 -0500 Subject: [PATCH 37/75] Clean up PP. --- frontends/relay-futil/pretty_print.py | 31 +++++++-------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/frontends/relay-futil/pretty_print.py b/frontends/relay-futil/pretty_print.py index 6c5b03dbd5..f711e78546 100644 --- a/frontends/relay-futil/pretty_print.py +++ b/frontends/relay-futil/pretty_print.py @@ -14,8 +14,7 @@ def mk_block(decl, contents, indent=2): def pp_component_signature(component: FComponent): inputs = [] - if component.signature == None: - return "", "" + if component.signature == None: return "", "" for input in component.signature.inputs: inputs.append(f'{input.name}: {input.bitwidth}') @@ -74,7 +73,7 @@ def pp_cell(cell: FCell): if cell.is_primitive(): data = cell.primitive.data data_type, bitwidth = cell.primitive.data_type, data[0] - # `fix` / `ufix` will have bitwidth form: . We only want TotalWidth. + # `fix` / `ufix` will have bitwidth in the form: . We only want TotalWidth. if data_type == 'ufix' or data_type == 'fix': bitwidth = str(bitwidth).split(',')[0] if cell.primitive.type == PrimitiveType.Register: return f'{cell.primitive.name} = prim std_reg({bitwidth});' @@ -82,34 +81,20 @@ def pp_cell(cell: FCell): value = str(data[1]) return f'{cell.primitive.name} = prim std_const({bitwidth}, {value});' if cell.primitive.type == PrimitiveType.Memory1D: - size = str(data[1]) - index_size = str(data[2]) + size, index_size = str(data[1]), str(data[2]) return f'{cell.primitive.name} = prim std_mem_d1({bitwidth}, {size}, {index_size});' if cell.primitive.type == PrimitiveType.Memory2D: - size0 = str(data[1]) - size1 = str(data[2]) - index_size0 = str(data[3]) - index_size1 = str(data[4]) + size0, size1, index_size0, index_size1 = str(data[1]), str(data[2]), str(data[3]), str(data[4]) return f'{cell.primitive.name} = prim std_mem_d2({bitwidth}, ' \ f'{size0}, {size1}, {index_size0}, {index_size1});' if cell.primitive.type == PrimitiveType.Memory3D: - size0 = str(data[1]) - size1 = str(data[2]) - size2 = str(data[3]) - index_size0 = str(data[4]) - index_size1 = str(data[5]) - index_size2 = str(data[6]) + size0, size1, size2 = str(data[1]), str(data[2]), str(data[3]) + index_size0, index_size1, index_size2 = str(data[4]), str(data[5]), str(data[6]) return f'{cell.primitive.name} = prim std_mem_d3({bitwidth}, ' \ f'{size0}, {size1}, {size2}, {index_size0}, {index_size1}, {index_size2});' if cell.primitive.type == PrimitiveType.Memory4D: - size0 = str(data[1]) - size1 = str(data[2]) - size2 = str(data[3]) - size3 = str(data[4]) - index_size0 = str(data[5]) - index_size1 = str(data[6]) - index_size2 = str(data[7]) - index_size3 = str(data[8]) + size0, size1, size2, size3 = str(data[1]), str(data[2]), str(data[3]), str(data[4]) + index_size0, index_size1, index_size2, index_size3 = str(data[5]), str(data[6]), str(data[7]), str(data[8]) return f'{cell.primitive.name} = prim std_mem_d4({bitwidth}, ' \ f'{size0}, {size1}, {size2}, {size3}, {index_size0}, {index_size1}, {index_size2}, {index_size3});' if cell.primitive.type == PrimitiveType.BinOp: From a99384f54421c4da425d96089a4fdef25709a2d1 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sun, 22 Nov 2020 08:51:22 -0500 Subject: [PATCH 38/75] Place mapping outside of function call. --- frontends/relay-futil/compiler.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 125d0d8092..ae328b4144 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -20,6 +20,11 @@ RelayFunctionCalls = {'nn.batch_flatten': batch_flatten, 'nn.batch_matmul': batch_matmul, 'nn.bias_add': bias_add, 'nn.relu': relu, 'negative': negative, 'expand_dims': expand_dims} +# Mapping between primitive type and associated Dahlia name extension. +# E.g. A 2D memory array named `A` will be lowered to `A_0`. +DahliaNameExtension = {PrimitiveType.Memory1D: '', PrimitiveType.Memory2D: '_0', + PrimitiveType.Memory3D: '_0_0', PrimitiveType.Memory4D: '_0_0_0'} + class Relay2Futil(ExprFunctor): """The main compilation visitor.""" @@ -61,11 +66,9 @@ def dahlia_name(self, name, type): Memory2D: 'X0_0', 'X1_0', 'X2_0', ... Memory3D: 'X0_0_0', 'X1_0_0', 'X2_0_0', ... """ - DahliaNameMapping = {PrimitiveType.Memory1D: '', PrimitiveType.Memory2D: '_0', - PrimitiveType.Memory3D: '_0_0', PrimitiveType.Memory4D: '_0_0_0'} dahlia_name = self.id(name) - assert type in DahliaNameMapping, f'{name} with {type} is not supported yet.' - return dahlia_name + DahliaNameMapping[type] + assert type in DahliaNameExtension, f'{name} with {type} is not supported yet.' + return dahlia_name + DahliaNameExtension[type] def get_dahlia_declaration(self, function_name, cells, args, attrs): """ From 3dd6a44debd0b67deec76301ae7fa5a73db2bd9b Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sun, 22 Nov 2020 14:58:19 -0500 Subject: [PATCH 39/75] Add support for dense operator. --- frontends/relay-futil/tests/data/dense.expect | 43 +++ frontends/relay-futil/tests/data/dense.relay | 5 + .../relay-futil/tests/data/dense.relay.data | 22 ++ frontends/relay-futil/tests/dense.expect | 334 ++++++++++++++++++ frontends/relay-futil/tests/dense.relay | 5 + 5 files changed, 409 insertions(+) create mode 100644 frontends/relay-futil/tests/data/dense.expect create mode 100644 frontends/relay-futil/tests/data/dense.relay create mode 100644 frontends/relay-futil/tests/data/dense.relay.data create mode 100644 frontends/relay-futil/tests/dense.expect create mode 100644 frontends/relay-futil/tests/dense.relay diff --git a/frontends/relay-futil/tests/data/dense.expect b/frontends/relay-futil/tests/data/dense.expect new file mode 100644 index 0000000000..279a161a27 --- /dev/null +++ b/frontends/relay-futil/tests/data/dense.expect @@ -0,0 +1,43 @@ +{ + "temporary_x20_0": [ + 3, + 3 + ], + "transpose_x10_0": [ + [ + 1, + 1 + ], + [ + 1, + 1 + ], + [ + 1, + 1 + ] + ], + "x": [ + [ + 1, + 1, + 1 + ] + ], + "x1": [ + [ + 1, + 1, + 1 + ], + [ + 1, + 1, + 1 + ] + ], + "x2": [ + 3, + 3 + ] +} diff --git a/frontends/relay-futil/tests/data/dense.relay b/frontends/relay-futil/tests/data/dense.relay new file mode 100644 index 0000000000..edf56c4d30 --- /dev/null +++ b/frontends/relay-futil/tests/data/dense.relay @@ -0,0 +1,5 @@ +v0.0.4 +fn (%x: Tensor[(1, 3), int32], %x1: Tensor[(2, 3), int32]) -> Tensor[(1, 2), int32] { + let %x2: Tensor[(1, 2), int32] = nn.dense(%x, %x1) /* ty=Tensor[(1, 2), int32] */; + %x2 +} diff --git a/frontends/relay-futil/tests/data/dense.relay.data b/frontends/relay-futil/tests/data/dense.relay.data new file mode 100644 index 0000000000..5ac288db59 --- /dev/null +++ b/frontends/relay-futil/tests/data/dense.relay.data @@ -0,0 +1,22 @@ +{ + "x": { + "data": [[1, 1, 1]], + "bitwidth": 32 + }, + "x1": { + "data": [[1, 1, 1], [1, 1, 1]], + "bitwidth": 32 + }, + "x2": { + "data": [0, 0], + "bitwidth": 32 + }, + "temporary_x20_0": { + "data": [0, 0], + "bitwidth": 32 + }, + "transpose_x10_0": { + "data": [[0, 0], [0, 0], [0, 0]], + "bitwidth": 32 + } +} \ No newline at end of file diff --git a/frontends/relay-futil/tests/dense.expect b/frontends/relay-futil/tests/dense.expect new file mode 100644 index 0000000000..a0d5ead2b1 --- /dev/null +++ b/frontends/relay-futil/tests/dense.expect @@ -0,0 +1,334 @@ +import "primitives/std.lib"; + +component dense(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1, y0_0_read_data: 32, y0_0_done: 1) -> (done: 1, x0_0_addr0: 1, x0_0_addr1: 13, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 4, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, y0_0_addr0: 4, y0_0_addr1: 13, y0_0_write_data: 32, y0_0_write_en: 1, y0_0_clk: 1) { + cells { + add0 = prim std_add(13); + add1 = prim std_add(4); + add2 = prim std_add(32); + add3 = prim std_add(13); + add4 = prim std_add(4); + add5 = prim std_add(1); + add6 = prim std_add(4); + add7 = prim std_add(1); + bin_read0_0 = prim std_reg(32); + const0 = prim std_const(4, 0); + const1 = prim std_const(4, 9); + const10 = prim std_const(13, 0); + const11 = prim std_const(13, 4095); + const12 = prim std_const(13, 1); + const13 = prim std_const(4, 1); + const14 = prim std_const(1, 1); + const15 = prim std_const(1, 0); + const16 = prim std_const(1, 0); + const17 = prim std_const(4, 0); + const18 = prim std_const(4, 9); + const19 = prim std_const(4, 1); + const2 = prim std_const(13, 0); + const20 = prim std_const(1, 1); + const3 = prim std_const(13, 4095); + const4 = prim std_const(13, 1); + const5 = prim std_const(4, 1); + const6 = prim std_const(1, 0); + const7 = prim std_const(1, 0); + const8 = prim std_const(4, 0); + const9 = prim std_const(4, 9); + i0 = prim std_reg(4); + i1 = prim std_reg(1); + i2 = prim std_reg(1); + j0 = prim std_reg(13); + j1 = prim std_reg(4); + j2 = prim std_reg(4); + k0 = prim std_reg(13); + le0 = prim std_le(4); + le1 = prim std_le(13); + le2 = prim std_le(1); + le3 = prim std_le(4); + le4 = prim std_le(13); + le5 = prim std_le(1); + le6 = prim std_le(4); + mult_pipe0 = prim std_mult_pipe(32); + product_0 = prim std_reg(32); + temporary_x10_0 = prim std_mem_d2(32, 1, 10, 1, 4); + temporary_x1_read0_0 = prim std_reg(32); + transpose_y0_0 = prim std_mem_d2(32, 4096, 10, 13, 4); + transpose_y_read0_0 = prim std_reg(32); + x_read0_0 = prim std_reg(32); + y_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = i1.out; + le2.right = const7.out; + } + group cond3<"static"=0> { + cond3[done] = 1'd1; + le3.left = j1.out; + le3.right = const9.out; + } + group cond4<"static"=0> { + cond4[done] = 1'd1; + le4.left = k0.out; + le4.right = const11.out; + } + group cond5<"static"=0> { + cond5[done] = 1'd1; + le5.left = i2.out; + le5.right = const16.out; + } + group cond6<"static"=0> { + cond6[done] = 1'd1; + le6.left = j2.out; + le6.right = const18.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let1[done] = j0.done; + } + group let2<"static"=1> { + i1.in = const6.out; + i1.write_en = 1'd1; + let2[done] = i1.done; + } + group let3<"static"=1> { + j1.in = const8.out; + j1.write_en = 1'd1; + let3[done] = j1.done; + } + group let4<"static"=1> { + k0.in = const10.out; + k0.write_en = 1'd1; + let4[done] = k0.done; + } + group let5<"static"=4> { + bin_read0_0.in = mult_pipe0.out; + bin_read0_0.write_en = mult_pipe0.done; + let5[done] = bin_read0_0.done; + mult_pipe0.left = x_read0_0.out; + mult_pipe0.right = transpose_y_read0_0.out; + mult_pipe0.go = !mult_pipe0.done ? 1'd1; + } + group let6<"static"=1> { + product_0.in = bin_read0_0.out; + product_0.write_en = 1'd1; + let6[done] = product_0.done; + } + group let7<"static"=1> { + i2.in = const15.out; + i2.write_en = 1'd1; + let7[done] = i2.done; + } + group let8<"static"=1> { + j2.in = const17.out; + j2.write_en = 1'd1; + let8[done] = j2.done; + } + group upd0<"static"=1> { + y_read0_0.write_en = 1'd1; + y0_0_addr1 = j0.out; + y0_0_addr0 = i0.out; + y_read0_0.in = 1'd1 ? y0_0_read_data; + upd0[done] = y_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + transpose_y0_0.addr1 = i0.out; + transpose_y0_0.addr0 = j0.out; + transpose_y0_0.write_en = 1'd1; + transpose_y0_0.write_data = 1'd1 ? y_read0_0.out; + upd1[done] = transpose_y0_0.done ? 1'd1; + } + group upd10<"static"=1> { + temporary_x1_read0_0.write_en = 1'd1; + temporary_x10_0.addr1 = j2.out; + temporary_x10_0.addr0 = i2.out; + temporary_x1_read0_0.in = 1'd1 ? temporary_x10_0.read_data; + upd10[done] = temporary_x1_read0_0.done ? 1'd1; + } + group upd11<"static"=1> { + x10_0_addr1 = j2.out; + x10_0_addr0 = i2.out; + x10_0_write_en = 1'd1; + x10_0_write_data = 1'd1 ? temporary_x1_read0_0.out; + upd11[done] = x10_0_done ? 1'd1; + } + group upd12<"static"=1> { + j2.write_en = 1'd1; + add6.left = j2.out; + add6.right = const19.out; + j2.in = 1'd1 ? add6.out; + upd12[done] = j2.done ? 1'd1; + } + group upd13<"static"=1> { + i2.write_en = 1'd1; + add7.left = i2.out; + add7.right = const20.out; + i2.in = 1'd1 ? add7.out; + upd13[done] = i2.done ? 1'd1; + } + group upd2<"static"=1> { + j0.write_en = 1'd1; + add0.left = j0.out; + add0.right = const4.out; + j0.in = 1'd1 ? add0.out; + upd2[done] = j0.done ? 1'd1; + } + group upd3<"static"=1> { + i0.write_en = 1'd1; + add1.left = i0.out; + add1.right = const5.out; + i0.in = 1'd1 ? add1.out; + upd3[done] = i0.done ? 1'd1; + } + group upd4<"static"=1> { + x_read0_0.write_en = 1'd1; + x0_0_addr1 = k0.out; + x0_0_addr0 = i1.out; + x_read0_0.in = 1'd1 ? x0_0_read_data; + upd4[done] = x_read0_0.done ? 1'd1; + } + group upd5<"static"=1> { + transpose_y_read0_0.write_en = 1'd1; + transpose_y0_0.addr1 = j1.out; + transpose_y0_0.addr0 = k0.out; + transpose_y_read0_0.in = 1'd1 ? transpose_y0_0.read_data; + upd5[done] = transpose_y_read0_0.done ? 1'd1; + } + group upd6<"static"=1> { + temporary_x10_0.addr1 = j1.out; + temporary_x10_0.addr0 = i1.out; + temporary_x10_0.write_en = 1'd1; + add2.left = temporary_x10_0.read_data; + add2.right = product_0.out; + temporary_x10_0.addr1 = j1.out; + temporary_x10_0.addr0 = i1.out; + temporary_x10_0.write_data = 1'd1 ? add2.out; + upd6[done] = temporary_x10_0.done ? 1'd1; + } + group upd7<"static"=1> { + k0.write_en = 1'd1; + add3.left = k0.out; + add3.right = const12.out; + k0.in = 1'd1 ? add3.out; + upd7[done] = k0.done ? 1'd1; + } + group upd8<"static"=1> { + j1.write_en = 1'd1; + add4.left = j1.out; + add4.right = const13.out; + j1.in = 1'd1 ? add4.out; + upd8[done] = j1.done ? 1'd1; + } + group upd9<"static"=1> { + i1.write_en = 1'd1; + add5.left = i1.out; + add5.right = const14.out; + i1.in = 1'd1 ? add5.out; + upd9[done] = i1.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + upd0; + upd1; + upd2; + } + } + upd3; + } + } + let2; + while le2.out with cond2 { + seq { + let3; + while le3.out with cond3 { + seq { + let4; + while le4.out with cond4 { + seq { + par { + upd4; + upd5; + } + let5; + let6; + upd6; + upd7; + } + } + upd8; + } + } + upd9; + } + } + let7; + while le5.out with cond5 { + seq { + let8; + while le6.out with cond6 { + seq { + upd10; + upd11; + upd12; + } + } + upd13; + } + } + } + } +} + +component main () -> () { + cells { + x1 = prim std_mem_d2(32, 1, 10, 1, 4); + x = prim std_mem_d2(32, 1, 4096, 1, 13); + y = prim std_mem_d2(32, 10, 4096, 4, 13); + dense0 = dense; + } + wires { + group run_dense { + x.addr0 = dense0.x0_0_addr0; + dense0.x0_0_read_data = x.read_data; + x.addr1 = dense0.x0_0_addr1; + y.addr0 = dense0.y0_0_addr0; + dense0.y0_0_read_data = y.read_data; + y.addr1 = dense0.y0_0_addr1; + x1.addr0 = dense0.x10_0_addr0; + x1.addr1 = dense0.x10_0_addr1; + x1.write_data = dense0.x10_0_write_data; + x1.write_en = dense0.x10_0_write_en; + dense0.x10_0_done = x1.done; + dense0.go = 1'd1; + run_dense[done] = dense0.done ? 1'd1; + } + } + control { + seq { + run_dense; + } + } +} diff --git a/frontends/relay-futil/tests/dense.relay b/frontends/relay-futil/tests/dense.relay new file mode 100644 index 0000000000..5bd91be7a2 --- /dev/null +++ b/frontends/relay-futil/tests/dense.relay @@ -0,0 +1,5 @@ +v0.0.4 +fn (%x: Tensor[(1, 4096), int32], %y: Tensor[(10, 4096), int32]) -> Tensor[(1, 10), int32] { + let %x1: Tensor[(1, 10), int32] = nn.dense(%x, %y, units=10) /* ty=Tensor[(1, 10), int32] */; + %x1 +} From c51c41a26fe3da41094b341a45f7b2e75fc6eb46 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Mon, 23 Nov 2020 16:18:04 -0500 Subject: [PATCH 40/75] Implement broadcasting. --- frontends/relay-futil/compiler.py | 8 +- frontends/relay-futil/dahlia_functions.py | 278 +++++++++++------- frontends/relay-futil/example.py | 9 +- frontends/relay-futil/futil_ast.py | 6 +- frontends/relay-futil/tests/broadcast.expect | 171 +++++++++++ frontends/relay-futil/tests/broadcast.relay | 5 + .../relay-futil/tests/data/broadcast.expect | 70 +++++ .../relay-futil/tests/data/broadcast.relay | 5 + .../tests/data/broadcast.relay.data | 14 + 9 files changed, 448 insertions(+), 118 deletions(-) create mode 100644 frontends/relay-futil/tests/broadcast.expect create mode 100644 frontends/relay-futil/tests/broadcast.relay create mode 100644 frontends/relay-futil/tests/data/broadcast.expect create mode 100644 frontends/relay-futil/tests/data/broadcast.relay create mode 100644 frontends/relay-futil/tests/data/broadcast.relay.data diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index ae328b4144..cd9bcb77b1 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -12,12 +12,8 @@ # Mapping from Relay binary calls to the respective Dahlia operator. BuiltInBinaryOps = {'add': '+', 'divide': '/', 'multiply': '*', 'subtract': '-'} -# Mapping from Tensor dimensions to function type. -BinaryOpTensorDimensions = {PrimitiveType.Memory1D: tensor1d_op, PrimitiveType.Memory2D: tensor2d_op, - PrimitiveType.Memory3D: tensor3d_op, PrimitiveType.Memory4D: tensor4d_op} - # Mapping from Relay function names to their respective Dahlia lowering. -RelayFunctionCalls = {'nn.batch_flatten': batch_flatten, 'nn.batch_matmul': batch_matmul, +RelayFunctionCalls = {'nn.dense': dense, 'nn.batch_flatten': batch_flatten, 'nn.batch_matmul': batch_matmul, 'nn.bias_add': bias_add, 'nn.relu': relu, 'negative': negative, 'expand_dims': expand_dims} # Mapping between primitive type and associated Dahlia name extension. @@ -79,7 +75,7 @@ def get_dahlia_declaration(self, function_name, cells, args, attrs): function = name = op = None if function_name in BuiltInBinaryOps: op = BuiltInBinaryOps[function_name] - function, name = BinaryOpTensorDimensions[input_type], function_name + function, name = broadcast, function_name elif function_name in RelayFunctionCalls: function = RelayFunctionCalls[function_name] name = function.__name__ diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 0b42dfa13f..bf7d69c7df 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -6,10 +6,11 @@ IMPORT_STATEMENT = """import "primitives/std.lib";\n""" NO_ERR = "2>/dev/null" +CHARACTER_I = chr(ord('i')) def lower_dahlia_program(prog, component_name): - ''' + """ Takes in a string representation of a Dahlia program, lowers it to FuTIL with the given `component_name`, and applies the `externalize` pass. This pass exposes the inputs and outputs of primitive types that are declared external, e.g. `std_mem_d1_ext`, and places them in the inputs and outputs of the respective component. @@ -31,7 +32,7 @@ def lower_dahlia_program(prog, component_name): (done: 1, X0_addr0: 2, X0_write_data: 32, X0_write_en: 1, X0_clk: 1) { ... } - ''' + """ program_string = '\n'.join(prog.splitlines()) with NamedTemporaryFile() as tf0, NamedTemporaryFile() as tf1, NamedTemporaryFile() as tf2: tf0.write(bytes(program_string, 'UTF-8')) @@ -45,123 +46,143 @@ def lower_dahlia_program(prog, component_name): return component -def tensor1d_op(declaration): - op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive - bitwidth, size, index_size, op2_size = op1.data[0], op1.data[1], op1.data[2], op2.data[1] - if size == op2_size: - program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{size}]; - decl {op2.name}: {op2.data_type}<{bitwidth}>[{size}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{size}]; - for (let i: ubit<{index_size}> = 0..{size}) {{ - {res.name}[i] := {op1.name}[i] {declaration.op} {op2.name}[i]; - }}""" - else: - # Broadcasting using a single value, e.g. - # let %a = 42; - # let %c = add(%b: Tensor[(512)], %a); - program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{size}]; - decl {op2.name}: {op2.data_type}<{bitwidth}>[{op2_size}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{size}]; - for (let i: ubit<{index_size}> = 0..{size}) {{ - {res.name}[i] := {op1.name}[i] {declaration.op} {op2.name}[0]; - }}""" - return lower_dahlia_program(program, declaration.component_name) - +def next_character(ch, dir=1): + """ + Returns the next character after 'ch'. + If dir is positive, then will return 'ch' + 1. Otherwise, it will return 'ch' - 1. + """ + return chr(ord(ch) + dir) if dir > 0 else chr(ord(ch) - 1) -def tensor2d_op(declaration): - op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive - bitwidth, size0, size1, index_size0, index_size1 = op1.data[0], op1.data[1], op1.data[2], op1.data[3], op1.data[4] - program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{size0}][{size1}]; - decl {op2.name}: {op2.data_type}<{bitwidth}>[{size0}][{size1}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}]; - for (let i: ubit<{index_size0}> = 0..{size0}) {{ - for (let j: ubit<{index_size1}> = 0..{size1}) {{ - {res.name}[i][j] := {op1.name}[i][j] {declaration.op} {op2.name}[i][j]; - }} - }}""" - return lower_dahlia_program(program, declaration.component_name) +def broadcast(declaration): + """ + https://numpy.org/doc/stable/user/basics.broadcasting.html + Implements array broadcasting: + Two dimensions are compatible when either (1) they're equal, or (2) one of them is 1. + It is not required that both operands have the same number of dimensions either. + - When lowering from Relay IR, we are guaranteed the arrays are compatible for broadcasting. + - Variable names for indexing through the array begin with `i`, and continue alphabetically. -def tensor3d_op(declaration): - op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive - bitwidth, size0, size1, size2 = op1.data[0], op1.data[1], op1.data[2], op1.data[3] - index_size0, index_size1, index_size2 = op1.data[4], op1.data[5], op1.data[6] + Example: + first operand: 64 x 1 x 32 + second operand: 16 x 1 + result: 64 x 16 x 32 + -> + for (i = 0...64) { + for (j = 0..16) { + for (k = 0..32) { + result[i][j][k] := op1[i][0][k] + op2[j][0]; + ... + """ + operand1, operand2 = declaration.inputs[0].primitive, declaration.inputs[1].primitive + res = declaration.output.primitive + op1 = operand1 if operand1.type >= operand2.type else operand2 + op2 = operand2 if op1 == operand1 else operand1 + + op1_offset, op2_offset = op1.type, op2.type + op1_sizes, op2_sizes, res_sizes = [], [], [] + for i in reversed(range(1, op1_offset + 1)): op1_sizes.append(op1.data[i]) + for i in reversed(range(1, op2_offset + 1)): op2_sizes.append(op2.data[i]) + for i in range(0, len(op1_sizes)): + size = op1_sizes[i] + res_sizes.append(max(size, op2_sizes[i]) if i < len(op2_sizes) else size) + + op1_indices, op2_indices, res_indices = [], [], [] + # Get the character associated with 'i' + N, where N == Memory Dimensions + variable_name = chr(ord(CHARACTER_I) + op1_offset - 1) + for i in range(0, len(op1_sizes)): + current_dimension, index_zero = f'[{variable_name}]', '[0]' + res_indices.append(current_dimension) + if len(op2_sizes) <= i: + op1_indices.append(current_dimension) + continue + elif op1_sizes[i] == op2_sizes[i]: + op1_indices.append(current_dimension) + op2_indices.append(current_dimension) + elif op1_sizes[i] > op2_sizes[i]: + op1_indices.append(current_dimension) + op2_indices.append(index_zero) + else: # op2_sizes[i] < op1_sizes[i] + op1_indices.append(index_zero) + op2_indices.append(current_dimension) + variable_name = next_character(variable_name, -1) + + op1_nth_index, op2_nth_index = ''.join(reversed(op1_indices)), ''.join(reversed(op2_indices)) + res_nth_index = ''.join(reversed(res_indices)) + + # Declarations for op1, op2, res. + op1_decl = f'decl {op1.name}: {op1.data_type}<{op1.data[0]}>' + for i in reversed(range(0, len(op1_sizes))): op1_decl += f'[{op1_sizes[i]}]' + + op2_decl = f'decl {op2.name}: {op2.data_type}<{op2.data[0]}>' + for i in reversed(range(0, len(op2_sizes))): op2_decl += f'[{op2_sizes[i]}]' + + res_decl = f'decl {res.name}: {res.data_type}<{res.data[0]}>' + for i in reversed(range(0, len(res_sizes))): res_decl += f'[{res_sizes[i]}]' + + # For loop(s). + variable_name = CHARACTER_I + loop_body = [] + for i in range(1, len(op1_sizes) + 1): + size, index_size = res.data[i], res.data[i + op1_offset] + if (i + op2_offset < len(op2_sizes)): + op2_size, op2_index_size = op2.data[i], op2.data[i + op2_offset] + size, index_size = max(size, op2_size), max(size, op2_index_size) + loop_body.append(f'for (let {variable_name}: ubit<{index_size}> = 0..{size}) {{') + variable_name = next_character(variable_name) + loop_body.append( + f'{res.name}{res_nth_index} := {op1.name}{op1_nth_index} {declaration.op} {op2.name}{op2_nth_index};') + for i in range(1, len(op1_sizes) + 1): loop_body.append('}') program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{size0}][{size1}][{size2}]; - decl {op2.name}: {op2.data_type}<{bitwidth}>[{size0}][{size1}][{size2}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}][{size2}]; - for (let i: ubit<{index_size0}> = 0..{size0}) {{ - for (let j: ubit<{index_size1}> = 0..{size1}) {{ - for (let k: ubit<{index_size2}> = 0..{size2}) {{ - {res.name}[i][j][k] := {op1.name}[i][j][k] {declaration.op} {op2.name}[i][j][k]; - }} - }} - }}""" - return lower_dahlia_program(program, declaration.component_name) - - -def tensor4d_op(declaration): - op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive - bitwidth, size0, size1, size2, size3 = op1.data[0], op1.data[1], op1.data[2], op1.data[3], op1.data[4] - index_size0, index_size1, index_size2, index_size3 = op1.data[5], op1.data[6], op1.data[7], op1.data[8] - if op1.type == op2.type: - program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; - decl {op2.name}: {op2.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; - for (let i: ubit<{index_size0}> = 0..{size0}) {{ - for (let j: ubit<{index_size1}> = 0..{size1}) {{ - for (let k: ubit<{index_size2}> = 0..{size2}) {{ - for (let l: ubit<{index_size3}> = 0..{size3}) {{ - {res.name}[i][j][k][l] := {op1.name}[i][j][k][l] {declaration.op} {op2.name}[i][j][k][l]; - }} - }} - }} - }}""" - else: # Broadcasting. - op2_size0, op2_size1, op2_size2 = op2.data[1], op2.data[2], op2.data[3] - op2_index_size0, op2_index_size1, op2_index_size2 = op2.data[3], op2.data[5], op2.data[6] - # TODO(cgyurgyik): This is defaulted to: `Tensor(X, Y, 1, 1) op Tensor(Y, 1, 1)` for VGG Net. - # This should be generalized. - program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; - decl {op2.name}: {op2.data_type}<{bitwidth}>[{op2_size0}][{op2_size1}][{op2_size2}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; - for (let i: ubit<{index_size0}> = 0..{size0}) {{ - for (let j: ubit<{index_size1}> = 0..{size1}) {{ - for (let k: ubit<{index_size2}> = 0..{size2}) {{ - for (let l: ubit<{index_size3}> = 0..{size3}) {{ - {res.name}[i][j][k][l] := {op1.name}[i][j][k][l] {declaration.op} {op2.name}[j][0][0]; - }} - }} - }} - }}""" + {op1_decl}; + {op2_decl}; + {res_decl}; + {' '.join(loop_body)} + """ return lower_dahlia_program(program, declaration.component_name) def batch_flatten(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_flatten""" op1, res = declaration.inputs[0].primitive, declaration.output.primitive - bitwidth, op1_size0, op1_size1, op1_size2 = op1.data[0], op1.data[1], op1.data[2], op1.data[3] - op1_index_size0, op1_index_size1, op1_index_size2 = op1.data[4], op1.data[5], op1.data[6] - res_bitwidth, res_size0, res_size1 = res.data[0], res.data[1], res.data[2] + bitwidth, res_bitwidth, res_size0, res_size1 = op1.data[0], res.data[0], res.data[1], res.data[2] res_index_size0, res_index_size1 = res.data[3], res.data[4] - program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{op1_size0}][{op1_size1}][{op1_size2}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{res_size0}][{res_size1}]; - let l: ubit<{res_index_size1}> = 0; - for (let i: ubit<{op1_index_size0}> = 0..{op1_size0}) {{ - for (let j: ubit<{op1_index_size1}> = 0..{op1_size1}) {{ - for (let k: ubit<{op1_index_size2}> = 0..{op1_size2}) {{ - {res.name}[i][l] := {op1.name}[i][j][k]; - l := l + 1; - }} - }} - }}""" - return lower_dahlia_program(program, declaration.component_name) + + if op1.type == PrimitiveType.Memory3D: + op1_size0, op1_size1, op1_size2 = op1.data[1], op1.data[2], op1.data[3] + op1_index_size0, op1_index_size1, op1_index_size2 = op1.data[4], op1.data[5], op1.data[6] + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{op1_size0}][{op1_size1}][{op1_size2}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{res_size0}][{res_size1}]; + let l: ubit<{res_index_size1}> = 0; + for (let i: ubit<{op1_index_size0}> = 0..{op1_size0}) {{ + for (let j: ubit<{op1_index_size1}> = 0..{op1_size1}) {{ + for (let k: ubit<{op1_index_size2}> = 0..{op1_size2}) {{ + {res.name}[i][l] := {op1.name}[i][j][k]; + l := l + 1; + }} + }} + }}""" + return lower_dahlia_program(program, declaration.component_name) + if op1.type == PrimitiveType.Memory4D: + op1_size0, op1_size1, op1_size2, op1_size3 = op1.data[1], op1.data[2], op1.data[3], op1.data[4] + op1_index_size0, op1_index_size1 = op1.data[5], op1.data[6] + op1_index_size2, op1_index_size3 = op1.data[7], op1.data[8] + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{op1_size0}][{op1_size1}][{op1_size2}][{op1_size3}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{res_size0}][{res_size1}]; + let l: ubit<{res_index_size1}> = 0; + for (let i: ubit<{op1_index_size0}> = 0..{op1_size0}) {{ + for (let j: ubit<{op1_index_size1}> = 0..{op1_size1}) {{ + for (let k: ubit<{op1_index_size2}> = 0..{op1_size2}) {{ + for (let l: ubit<{op1_index_size3}> = 0..{op1_size3}) {{ + {res.name}[i][l] := {op1.name}[i][j][k][l]; + l := l + 1; + }} + }} + }} + }}""" + return lower_dahlia_program(program, declaration.component_name) def bias_add(declaration): @@ -352,3 +373,44 @@ def batch_matmul(declaration): }} """ return lower_dahlia_program(program, declaration.component_name) + + +# TODO(cgyurgyik): Similar to batch_matmul, this requires a temporary memory to store the output +# of the matrix multiply. Otherwise, the values aren't computed properly. Look deeper into this. +def dense(declaration): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_matmul""" + # TODO(cgyurgyik): Add support for `units`. + units = declaration.attributes.get_int("units") + op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive + bitwidth, M1_size0, M1_size1 = op1.data[0], op1.data[1], op1.data[2] + M1_index_size0, M1_index_size1 = op1.data[3], op1.data[4] + M2_size0, M2_size1, M2_index_size0, M2_index_size1 = op2.data[1], op2.data[2], op2.data[3], op2.data[4] + program = f""" + decl {op1.name}: {op1.data_type}<{bitwidth}>[{M1_size0}][{M1_size1}]; + decl {op2.name}: {op2.data_type}<{bitwidth}>[{M2_size0}][{M2_size1}]; + decl {res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M2_size0}]; + let transpose_{op2.name}: {op2.data_type}<{bitwidth}>[{M2_size1}][{M2_size0}]; + let temporary_{res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M2_size0}]; + for (let i: ubit<{M2_index_size0}> = 0..{M2_size0}) {{ + for (let j: ubit<{M2_index_size1}> = 0..{M2_size1}) {{ + transpose_{op2.name}[j][i] := {op2.name}[i][j]; + }} + }} + + for (let i: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ + for (let j: ubit<{M2_index_size0}> = 0..{M2_size0}) {{ + for (let k: ubit<{M1_index_size1}> = 0..{M1_size1}) {{ + let product = {op1.name}[i][k] * transpose_{op2.name}[k][j]; + }} combine {{ + temporary_{res.name}[i][j] += product; + }} + }} + }} + + for (let i: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ + for (let j: ubit<{M2_index_size0}> = 0..{M2_size0}) {{ + {res.name}[i][j] := temporary_{res.name}[i][j]; + }} + }} + """ + return lower_dahlia_program(program, declaration.component_name) diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index 45e18b387c..0a986e9453 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -9,6 +9,7 @@ def add(): y = relay.var('y', shape=(), dtype="int32") return relay.Function([x, y], relay.add(x, y)) + def tensor_subtract(): x = relay.var("x", relay.TensorType((2, 4), "int32")) y = relay.var("y", relay.TensorType((2, 4), "int32")) @@ -42,6 +43,12 @@ def relu(): return relay.Function([x], relay.nn.relu(x)) +def dense(): + x = relay.var('x', shape=[1, 4096], dtype='int32') + y = relay.var('y', shape=[10, 4096], dtype='int32') + return relay.Function([x, y], relay.nn.dense(x, y, units=10)) + + def mlp_net(): """The MLP test from Relay.""" from tvm.relay.testing import mlp @@ -55,7 +62,7 @@ def vgg_net(): batch_norm=True) -ALL_FUNCS = [add, tensor_subtract, expand_dims, batch_flatten, batch_matmul, bias_add, relu, mlp_net, vgg_net] +ALL_FUNCS = [add, tensor_subtract, expand_dims, batch_flatten, batch_matmul, bias_add, relu, dense, mlp_net, vgg_net] FUNC_NAMES = list(map(lambda x: x.__name__, ALL_FUNCS)) diff --git a/frontends/relay-futil/futil_ast.py b/frontends/relay-futil/futil_ast.py index 9432a62a4d..03f2e8fa0e 100644 --- a/frontends/relay-futil/futil_ast.py +++ b/frontends/relay-futil/futil_ast.py @@ -2,10 +2,10 @@ from dataclasses import dataclass from typing import List, Dict from types import FunctionType -from enum import Enum +from enum import Enum, IntEnum - -class PrimitiveType(Enum): +# Note: The integer value N for Memory with dimension N is used; these should remain unchanged. +class PrimitiveType(IntEnum): Memory1D = 1 Memory2D = 2 Memory3D = 3 diff --git a/frontends/relay-futil/tests/broadcast.expect b/frontends/relay-futil/tests/broadcast.expect new file mode 100644 index 0000000000..9527534871 --- /dev/null +++ b/frontends/relay-futil/tests/broadcast.expect @@ -0,0 +1,171 @@ +import "primitives/std.lib"; + +component add(go: 1, clk: 1, x10_0_0_read_data: 32, x10_0_0_done: 1, x20_0_0_read_data: 32, x20_0_0_done: 1, x30_0_0_read_data: 32, x30_0_0_done: 1) -> (done: 1, x10_0_0_addr0: 1, x10_0_0_addr1: 2, x10_0_0_addr2: 2, x10_0_0_write_data: 32, x10_0_0_write_en: 1, x10_0_0_clk: 1, x20_0_0_addr0: 2, x20_0_0_addr1: 1, x20_0_0_addr2: 1, x20_0_0_write_data: 32, x20_0_0_write_en: 1, x20_0_0_clk: 1, x30_0_0_addr0: 2, x30_0_0_addr1: 2, x30_0_0_addr2: 2, x30_0_0_write_data: 32, x30_0_0_write_en: 1, x30_0_0_clk: 1) { + cells { + add0 = prim std_add(32); + add1 = prim std_add(2); + add2 = prim std_add(2); + add3 = prim std_add(2); + const0 = prim std_const(2, 0); + const1 = prim std_const(2, 1); + const10 = prim std_const(2, 1); + const11 = prim std_const(2, 1); + const2 = prim std_const(2, 0); + const3 = prim std_const(2, 1); + const4 = prim std_const(2, 0); + const5 = prim std_const(2, 1); + const6 = prim std_const(1, 0); + const7 = prim std_const(1, 0); + const8 = prim std_const(1, 0); + const9 = prim std_const(2, 1); + i0 = prim std_reg(2); + j0 = prim std_reg(2); + k0 = prim std_reg(2); + le0 = prim std_le(2); + le1 = prim std_le(2); + le2 = prim std_le(2); + x1_read0_0 = prim std_reg(32); + x2_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = k0.out; + le2.right = const5.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let1[done] = j0.done; + } + group let2<"static"=1> { + k0.in = const4.out; + k0.write_en = 1'd1; + let2[done] = k0.done; + } + group upd0<"static"=1> { + x1_read0_0.write_en = 1'd1; + x10_0_0_addr2 = k0.out; + x10_0_0_addr1 = j0.out; + x10_0_0_addr0 = const6.out; + x1_read0_0.in = 1'd1 ? x10_0_0_read_data; + upd0[done] = x1_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + x2_read0_0.write_en = 1'd1; + x20_0_0_addr2 = const8.out; + x20_0_0_addr1 = const7.out; + x20_0_0_addr0 = i0.out; + x2_read0_0.in = 1'd1 ? x20_0_0_read_data; + upd1[done] = x2_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + x30_0_0_addr2 = k0.out; + x30_0_0_addr1 = j0.out; + x30_0_0_addr0 = i0.out; + x30_0_0_write_en = 1'd1; + add0.left = x1_read0_0.out; + add0.right = x2_read0_0.out; + x30_0_0_write_data = 1'd1 ? add0.out; + upd2[done] = x30_0_0_done ? 1'd1; + } + group upd3<"static"=1> { + k0.write_en = 1'd1; + add1.left = k0.out; + add1.right = const9.out; + k0.in = 1'd1 ? add1.out; + upd3[done] = k0.done ? 1'd1; + } + group upd4<"static"=1> { + j0.write_en = 1'd1; + add2.left = j0.out; + add2.right = const10.out; + j0.in = 1'd1 ? add2.out; + upd4[done] = j0.done ? 1'd1; + } + group upd5<"static"=1> { + i0.write_en = 1'd1; + add3.left = i0.out; + add3.right = const11.out; + i0.in = 1'd1 ? add3.out; + upd5[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + let2; + while le2.out with cond2 { + seq { + par { + upd0; + upd1; + } + upd2; + upd3; + } + } + upd4; + } + } + upd5; + } + } + } + } +} + +component main () -> () { + cells { + x3 = prim std_mem_d3(32, 2, 2, 2, 2, 2, 2); + x1 = prim std_mem_d3(32, 1, 2, 2, 1, 2, 2); + x2 = prim std_mem_d3(32, 2, 1, 1, 2, 1, 1); + add0 = add; + } + wires { + group run_add { + x1.addr0 = add0.x10_0_0_addr0; + add0.x10_0_0_read_data = x1.read_data; + x1.addr1 = add0.x10_0_0_addr1; + x1.addr2 = add0.x10_0_0_addr2; + x2.addr0 = add0.x20_0_0_addr0; + add0.x20_0_0_read_data = x2.read_data; + x2.addr1 = add0.x20_0_0_addr1; + x2.addr2 = add0.x20_0_0_addr2; + x3.addr0 = add0.x30_0_0_addr0; + x3.addr1 = add0.x30_0_0_addr1; + x3.addr2 = add0.x30_0_0_addr2; + x3.write_data = add0.x30_0_0_write_data; + x3.write_en = add0.x30_0_0_write_en; + add0.x30_0_0_done = x3.done; + add0.go = 1'd1; + run_add[done] = add0.done ? 1'd1; + } + } + control { + seq { + run_add; + } + } +} diff --git a/frontends/relay-futil/tests/broadcast.relay b/frontends/relay-futil/tests/broadcast.relay new file mode 100644 index 0000000000..bacd708118 --- /dev/null +++ b/frontends/relay-futil/tests/broadcast.relay @@ -0,0 +1,5 @@ +v0.0.4 +fn (%x1: Tensor[(1, 2, 2), int32], %x2: Tensor[(2, 1, 1), int32]) { + let %x3 = add(%x1, %x2); + %x3 +} diff --git a/frontends/relay-futil/tests/data/broadcast.expect b/frontends/relay-futil/tests/data/broadcast.expect new file mode 100644 index 0000000000..b44c484e80 --- /dev/null +++ b/frontends/relay-futil/tests/data/broadcast.expect @@ -0,0 +1,70 @@ +{ + "x1": [ + [ + [ + 1 + ], + [ + 1 + ] + ], + [ + [ + 2 + ], + [ + 2 + ] + ] + ], + "x2": [ + [ + [ + 1 + ] + ], + [ + [ + 1 + ] + ] + ], + "x3": [ + [ + [ + [ + 2 + ], + [ + 2 + ] + ], + [ + [ + 3 + ], + [ + 3 + ] + ] + ], + [ + [ + [ + 2 + ], + [ + 2 + ] + ], + [ + [ + 3 + ], + [ + 3 + ] + ] + ] + ] +} diff --git a/frontends/relay-futil/tests/data/broadcast.relay b/frontends/relay-futil/tests/data/broadcast.relay new file mode 100644 index 0000000000..bacd708118 --- /dev/null +++ b/frontends/relay-futil/tests/data/broadcast.relay @@ -0,0 +1,5 @@ +v0.0.4 +fn (%x1: Tensor[(1, 2, 2), int32], %x2: Tensor[(2, 1, 1), int32]) { + let %x3 = add(%x1, %x2); + %x3 +} diff --git a/frontends/relay-futil/tests/data/broadcast.relay.data b/frontends/relay-futil/tests/data/broadcast.relay.data new file mode 100644 index 0000000000..e2f914b5c5 --- /dev/null +++ b/frontends/relay-futil/tests/data/broadcast.relay.data @@ -0,0 +1,14 @@ +{ + "x1": { + "data": [[[1], [1]], [[2], [2]]], + "bitwidth": 32 + }, + "x2": { + "data": [[[1]], [[1]]], + "bitwidth": 32 + }, + "x3": { + "data": [[ [[0], [0]], [[0], [0]] ], [ [[0], [0]], [[0], [0]] ]], + "bitwidth": 32 + } +} \ No newline at end of file From 96654d463bd4fae3086c5a82e3e45e9d52d47cf0 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Mon, 23 Nov 2020 16:23:33 -0500 Subject: [PATCH 41/75] Rename mapping. --- frontends/relay-futil/utilities.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/frontends/relay-futil/utilities.py b/frontends/relay-futil/utilities.py index faaccf1fe7..44e9f9c4f9 100644 --- a/frontends/relay-futil/utilities.py +++ b/frontends/relay-futil/utilities.py @@ -3,7 +3,7 @@ import math # Mapping from the tensor dimensions to the corresponding FuTIL memory type. -TensorToMemoryDimensionMapping = {1: PrimitiveType.Memory1D, 2: PrimitiveType.Memory2D, +NumDimensionsToPrimitive = {1: PrimitiveType.Memory1D, 2: PrimitiveType.Memory2D, 3: PrimitiveType.Memory3D, 4: PrimitiveType.Memory4D} @@ -65,10 +65,10 @@ def get_memory_parameters(type): tensor_dimensions = list(map(int, string_dimensions.split(','))) data, num_dimensions = [get_bitwidth(string_type)], len(tensor_dimensions) - assert num_dimensions in TensorToMemoryDimensionMapping, f'{num_dimensions} dimensions is not supported.' + assert num_dimensions in NumDimensionsToPrimitive, f'{num_dimensions} dimensions is not supported.' for dimension in tensor_dimensions: data.append(dimension) # Size. for dimension in tensor_dimensions: data.append(int(math.log2(dimension) + 1)) # Index size. - return data, TensorToMemoryDimensionMapping[num_dimensions], data_type + return data, NumDimensionsToPrimitive[num_dimensions], data_type def build_main_controls(c: FComponent): From a8a90395a6f1b70543ead3355a3c6ef827b5c12f Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Mon, 23 Nov 2020 17:04:12 -0500 Subject: [PATCH 42/75] Fix spacing. --- frontends/relay-futil/utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontends/relay-futil/utilities.py b/frontends/relay-futil/utilities.py index 44e9f9c4f9..aab5fd2a72 100644 --- a/frontends/relay-futil/utilities.py +++ b/frontends/relay-futil/utilities.py @@ -4,7 +4,7 @@ # Mapping from the tensor dimensions to the corresponding FuTIL memory type. NumDimensionsToPrimitive = {1: PrimitiveType.Memory1D, 2: PrimitiveType.Memory2D, - 3: PrimitiveType.Memory3D, 4: PrimitiveType.Memory4D} + 3: PrimitiveType.Memory3D, 4: PrimitiveType.Memory4D} def flatten(l): From 52d0b9014d3c33002acc022173692167545c6bdf Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Mon, 23 Nov 2020 17:09:32 -0500 Subject: [PATCH 43/75] Fix dahlia naming. --- frontends/relay-futil/compiler.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index cd9bcb77b1..75dc565df9 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -17,9 +17,9 @@ 'nn.bias_add': bias_add, 'nn.relu': relu, 'negative': negative, 'expand_dims': expand_dims} # Mapping between primitive type and associated Dahlia name extension. -# E.g. A 2D memory array named `A` will be lowered to `A_0`. -DahliaNameExtension = {PrimitiveType.Memory1D: '', PrimitiveType.Memory2D: '_0', - PrimitiveType.Memory3D: '_0_0', PrimitiveType.Memory4D: '_0_0_0'} +# E.g. A 2D memory primitive named `A` will be lowered to `A0_0`. +DahliaNameExtension = {PrimitiveType.Memory1D: '0', PrimitiveType.Memory2D: '0_0', + PrimitiveType.Memory3D: '0_0_0', PrimitiveType.Memory4D: '0_0_0_0'} class Relay2Futil(ExprFunctor): @@ -62,9 +62,8 @@ def dahlia_name(self, name, type): Memory2D: 'X0_0', 'X1_0', 'X2_0', ... Memory3D: 'X0_0_0', 'X1_0_0', 'X2_0_0', ... """ - dahlia_name = self.id(name) assert type in DahliaNameExtension, f'{name} with {type} is not supported yet.' - return dahlia_name + DahliaNameExtension[type] + return name + DahliaNameExtension[type] def get_dahlia_declaration(self, function_name, cells, args, attrs): """ From 0ff11ece949ca6343fcad044947b04756867cb37 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Mon, 23 Nov 2020 17:17:46 -0500 Subject: [PATCH 44/75] Cleanup. --- frontends/relay-futil/dahlia_functions.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index bf7d69c7df..5b6b397f0a 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -88,7 +88,7 @@ def broadcast(declaration): res_sizes.append(max(size, op2_sizes[i]) if i < len(op2_sizes) else size) op1_indices, op2_indices, res_indices = [], [], [] - # Get the character associated with 'i' + N, where N == Memory Dimensions + # Get the character associated with 'i' + N, where N == number of dimensions in `op1`. variable_name = chr(ord(CHARACTER_I) + op1_offset - 1) for i in range(0, len(op1_sizes)): current_dimension, index_zero = f'[{variable_name}]', '[0]' @@ -112,12 +112,10 @@ def broadcast(declaration): # Declarations for op1, op2, res. op1_decl = f'decl {op1.name}: {op1.data_type}<{op1.data[0]}>' - for i in reversed(range(0, len(op1_sizes))): op1_decl += f'[{op1_sizes[i]}]' - op2_decl = f'decl {op2.name}: {op2.data_type}<{op2.data[0]}>' - for i in reversed(range(0, len(op2_sizes))): op2_decl += f'[{op2_sizes[i]}]' - res_decl = f'decl {res.name}: {res.data_type}<{res.data[0]}>' + for i in reversed(range(0, len(op1_sizes))): op1_decl += f'[{op1_sizes[i]}]' + for i in reversed(range(0, len(op2_sizes))): op2_decl += f'[{op2_sizes[i]}]' for i in reversed(range(0, len(res_sizes))): res_decl += f'[{res_sizes[i]}]' # For loop(s). From 09a20193cf39185512a9df092cb5cb1fa893a865 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Mon, 23 Nov 2020 17:25:56 -0500 Subject: [PATCH 45/75] CLeanup. --- frontends/relay-futil/dahlia_functions.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 5b6b397f0a..2af92cd2a8 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -107,8 +107,9 @@ def broadcast(declaration): op2_indices.append(current_dimension) variable_name = next_character(variable_name, -1) - op1_nth_index, op2_nth_index = ''.join(reversed(op1_indices)), ''.join(reversed(op2_indices)) - res_nth_index = ''.join(reversed(res_indices)) + # Resulting index in the nested for loop, e.g. for op1[i][j][0][k], this is `[i][j][0][k]`. + op1_index, op2_index = ''.join(reversed(op1_indices)), ''.join(reversed(op2_indices)) + res_index = ''.join(reversed(res_indices)) # Declarations for op1, op2, res. op1_decl = f'decl {op1.name}: {op1.data_type}<{op1.data[0]}>' @@ -128,8 +129,8 @@ def broadcast(declaration): size, index_size = max(size, op2_size), max(size, op2_index_size) loop_body.append(f'for (let {variable_name}: ubit<{index_size}> = 0..{size}) {{') variable_name = next_character(variable_name) - loop_body.append( - f'{res.name}{res_nth_index} := {op1.name}{op1_nth_index} {declaration.op} {op2.name}{op2_nth_index};') + loop_body.append(f'{res.name}{res_index} := {op1.name}{op1_index} {declaration.op} {op2.name}{op2_index};') + for i in range(1, len(op1_sizes) + 1): loop_body.append('}') program = f""" {op1_decl}; From 9c41278da21080d803d3aea331cc74b1db437bd6 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Mon, 23 Nov 2020 17:30:09 -0500 Subject: [PATCH 46/75] Add todo for supporting axis=-1. --- frontends/relay-futil/dahlia_functions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 2af92cd2a8..ef76e9a709 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -187,6 +187,8 @@ def batch_flatten(declaration): def bias_add(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.bias_add""" axis = declaration.attributes.get_int("axis") + # TODO(cgyurgyik): Supported axis = -1. + assert axis == 0 or axis == 1, f'bias_add with axis: {axis} is not currently supported.' data, bias, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive bitwidth = data.data[0] if data.type == PrimitiveType.Memory2D: From 98589f2a6dac048e13076963dfe7c76a96e86923 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Tue, 24 Nov 2020 08:10:18 -0500 Subject: [PATCH 47/75] Begin generalizing functions for any tensor size. --- frontends/relay-futil/dahlia_functions.py | 241 ++++++---------------- frontends/relay-futil/pretty_print.py | 51 +++++ 2 files changed, 118 insertions(+), 174 deletions(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index ef76e9a709..6b0e7496a9 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -3,10 +3,12 @@ from tempfile import NamedTemporaryFile, TemporaryFile from futil_ast import * +from pretty_print import * IMPORT_STATEMENT = """import "primitives/std.lib";\n""" NO_ERR = "2>/dev/null" CHARACTER_I = chr(ord('i')) +NEWL = '\n' def lower_dahlia_program(prog, component_name): @@ -79,18 +81,17 @@ def broadcast(declaration): op1 = operand1 if operand1.type >= operand2.type else operand2 op2 = operand2 if op1 == operand1 else operand1 - op1_offset, op2_offset = op1.type, op2.type + op1_dims, op2_dims, res_dims = op1.type, op2.type, res.type op1_sizes, op2_sizes, res_sizes = [], [], [] - for i in reversed(range(1, op1_offset + 1)): op1_sizes.append(op1.data[i]) - for i in reversed(range(1, op2_offset + 1)): op2_sizes.append(op2.data[i]) - for i in range(0, len(op1_sizes)): - size = op1_sizes[i] - res_sizes.append(max(size, op2_sizes[i]) if i < len(op2_sizes) else size) + # Get memory sizes in reversed order. + for i in reversed(range(0, op1_dims)): op1_sizes.append(op1.data[i + 1]) + for i in reversed(range(0, op2_dims)): op2_sizes.append(op2.data[i + 1]) + for i in reversed(range(0, res_dims)): res_sizes.append(res.data[i + 1]) op1_indices, op2_indices, res_indices = [], [], [] # Get the character associated with 'i' + N, where N == number of dimensions in `op1`. - variable_name = chr(ord(CHARACTER_I) + op1_offset - 1) - for i in range(0, len(op1_sizes)): + variable_name = chr(ord(CHARACTER_I) + op1_dims - 1) + for i in range(0, len(res_sizes)): current_dimension, index_zero = f'[{variable_name}]', '[0]' res_indices.append(current_dimension) if len(op2_sizes) <= i: @@ -110,129 +111,60 @@ def broadcast(declaration): # Resulting index in the nested for loop, e.g. for op1[i][j][0][k], this is `[i][j][0][k]`. op1_index, op2_index = ''.join(reversed(op1_indices)), ''.join(reversed(op2_indices)) res_index = ''.join(reversed(res_indices)) + loop_body = f'{res.name}{res_index} := {op1.name}{op1_index} {declaration.op} {op2.name}{op2_index};' - # Declarations for op1, op2, res. - op1_decl = f'decl {op1.name}: {op1.data_type}<{op1.data[0]}>' - op2_decl = f'decl {op2.name}: {op2.data_type}<{op2.data[0]}>' - res_decl = f'decl {res.name}: {res.data_type}<{res.data[0]}>' - for i in reversed(range(0, len(op1_sizes))): op1_decl += f'[{op1_sizes[i]}]' - for i in reversed(range(0, len(op2_sizes))): op2_decl += f'[{op2_sizes[i]}]' - for i in reversed(range(0, len(res_sizes))): res_decl += f'[{res_sizes[i]}]' - - # For loop(s). - variable_name = CHARACTER_I - loop_body = [] - for i in range(1, len(op1_sizes) + 1): - size, index_size = res.data[i], res.data[i + op1_offset] - if (i + op2_offset < len(op2_sizes)): - op2_size, op2_index_size = op2.data[i], op2.data[i + op2_offset] - size, index_size = max(size, op2_size), max(size, op2_index_size) - loop_body.append(f'for (let {variable_name}: ubit<{index_size}> = 0..{size}) {{') - variable_name = next_character(variable_name) - loop_body.append(f'{res.name}{res_index} := {op1.name}{op1_index} {declaration.op} {op2.name}{op2_index};') - - for i in range(1, len(op1_sizes) + 1): loop_body.append('}') - program = f""" - {op1_decl}; - {op2_decl}; - {res_decl}; - {' '.join(loop_body)} - """ + program_body = pp_dahlia_loop(res, loop_body) + declarations = pp_dahlia_memory_declarations([res, op1, op2]) + program = f"""{declarations}{NEWL}{program_body}""" return lower_dahlia_program(program, declaration.component_name) def batch_flatten(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_flatten""" - op1, res = declaration.inputs[0].primitive, declaration.output.primitive - bitwidth, res_bitwidth, res_size0, res_size1 = op1.data[0], res.data[0], res.data[1], res.data[2] - res_index_size0, res_index_size1 = res.data[3], res.data[4] + data, res = declaration.inputs[0].primitive, declaration.output.primitive + bitwidth, num_dimensions = data.data[0], data.type + res_index_size1 = res.data[4] - if op1.type == PrimitiveType.Memory3D: - op1_size0, op1_size1, op1_size2 = op1.data[1], op1.data[2], op1.data[3] - op1_index_size0, op1_index_size1, op1_index_size2 = op1.data[4], op1.data[5], op1.data[6] - program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{op1_size0}][{op1_size1}][{op1_size2}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{res_size0}][{res_size1}]; - let l: ubit<{res_index_size1}> = 0; - for (let i: ubit<{op1_index_size0}> = 0..{op1_size0}) {{ - for (let j: ubit<{op1_index_size1}> = 0..{op1_size1}) {{ - for (let k: ubit<{op1_index_size2}> = 0..{op1_size2}) {{ - {res.name}[i][l] := {op1.name}[i][j][k]; - l := l + 1; - }} - }} - }}""" - return lower_dahlia_program(program, declaration.component_name) - if op1.type == PrimitiveType.Memory4D: - op1_size0, op1_size1, op1_size2, op1_size3 = op1.data[1], op1.data[2], op1.data[3], op1.data[4] - op1_index_size0, op1_index_size1 = op1.data[5], op1.data[6] - op1_index_size2, op1_index_size3 = op1.data[7], op1.data[8] - program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{op1_size0}][{op1_size1}][{op1_size2}][{op1_size3}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{res_size0}][{res_size1}]; - let l: ubit<{res_index_size1}> = 0; - for (let i: ubit<{op1_index_size0}> = 0..{op1_size0}) {{ - for (let j: ubit<{op1_index_size1}> = 0..{op1_size1}) {{ - for (let k: ubit<{op1_index_size2}> = 0..{op1_size2}) {{ - for (let l: ubit<{op1_index_size3}> = 0..{op1_size3}) {{ - {res.name}[i][l] := {op1.name}[i][j][k][l]; - l := l + 1; - }} - }} - }} - }}""" - return lower_dahlia_program(program, declaration.component_name) + variable_name = CHARACTER_I + data_indices, res_indices = "", f'[{variable_name}]' + for i in range(0, num_dimensions): + # Determine loop body indices based on `axis` provided. + size, index_size = data.data[i + 1], data.data[i + num_dimensions + 1] + index = f'[{variable_name}]' + data_indices += index + variable_name = next_character(variable_name) + res_indices += f'[{variable_name}]' + + declarations = pp_dahlia_memory_declarations([data, res]) + let_flattened = f'let {variable_name}: ubit<{res_index_size1}> = 0;' + body = (f"{res.name}{res_indices} := {data.name}{data_indices}; {variable_name} := {variable_name} + 1;") + loops = pp_dahlia_loop(data, body) + program = f"""{declarations}{NEWL}{let_flattened}{NEWL}{loops}""" + return lower_dahlia_program(program, declaration.component_name) def bias_add(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.bias_add""" - axis = declaration.attributes.get_int("axis") - # TODO(cgyurgyik): Supported axis = -1. - assert axis == 0 or axis == 1, f'bias_add with axis: {axis} is not currently supported.' data, bias, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive - bitwidth = data.data[0] - if data.type == PrimitiveType.Memory2D: - size0, size1, index_size0, index_size1 = data.data[1], data.data[2], data.data[3], data.data[4] - bias_size, bias_index_size = bias.data[1], bias.data[2] - program = f""" - decl {data.name}: {data.data_type}<{bitwidth}>[{size0}][{size1}]; - decl {bias.name}: {bias.data_type}<{bitwidth}>[{bias_size}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}];""" - if axis == 1: - program += f""" - for (let i: ubit<{index_size0}> = 0..{size0}) {{ - for (let j: ubit<{index_size1}> = 0..{size1}) {{ - {res.name}[i][j] := {data.name}[i][j] + {bias.name}[j]; - }} - }}""" - elif axis == 0: - program += f""" - for (let j: ubit<{index_size1}> = 0..{size1}) {{ - for (let i: ubit<{index_size0}> = 0..{size0}) {{ - {res.name}[i][j] := {data.name}[i][j] + {bias.name}[i]; - }} - }}""" - elif data.type == PrimitiveType.Memory4D: - bitwidth, size0, size1, size2, size3 = data.data[0], data.data[1], data.data[2], data.data[3], data.data[4] - index_size0, index_size1, index_size2, index_size3 = data.data[5], data.data[6], data.data[7], data.data[8] - bias_size, bias_index_size = bias.data[1], bias.data[2] - program = f""" - decl {data.name}: {data.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}]; - decl {bias.name}: {bias.data_type}<{bitwidth}>[{bias_size}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}][{size2}][{size3}];""" - if axis == 1: - program += f""" - for (let i: ubit<{index_size0}> = 0..{size0}) {{ - for (let j: ubit<{index_size1}> = 0..{size1}) {{ - for (let k: ubit<{index_size2}> = 0..{size2}) {{ - for (let l: ubit<{index_size3}> = 0..{size3}) {{ - {res.name}[i][j][k][l] := {data.name}[i][j][k][l] + {bias.name}[j]; - }} - }} - }} - }}""" + bitwidth, num_dimensions = data.data[0], data.type - return lower_dahlia_program(program, declaration.component_name) + axis_attribute = declaration.attributes.get_int("axis") + axis = num_dimensions - 1 if axis_attribute == -1 else axis_attribute + + variable_name = CHARACTER_I + data_indices = "" + for i in range(0, num_dimensions): + # Determine loop body indices based on `axis` provided. + size, index_size = data.data[i + 1], data.data[i + num_dimensions + 1] + index = f'[{variable_name}]' + if axis == i: bias_index = index + data_indices += index + variable_name = next_character(variable_name) + + declarations = pp_dahlia_memory_declarations([data, bias, res]) + body = (f"{res.name}{data_indices} := {data.name}{data_indices} + {bias.name}{bias_index};") + loops = pp_dahlia_loop(data, body) + return lower_dahlia_program(f"""{declarations}{NEWL}{loops}""", declaration.component_name) # TODO(cgyurgyik): @@ -242,56 +174,23 @@ def bias_add(declaration): def relu(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.relu""" op1, res = declaration.inputs[0].primitive, declaration.output.primitive + bitwidth, num_dimensions = op1.data[0], op1.type assert res.data_type == 'ubit', f'{res.data_type} is not currently supported for ReLU.' - if op1.type == PrimitiveType.Memory2D: - bitwidth, op1_size0, op1_size1 = op1.data[0], op1.data[1], op1.data[2] - op1_index_size0, op1_index_size1 = op1.data[3], op1.data[4] - res_bitwidth, res_size0, res_size1 = res.data[0], res.data[1], res.data[2] - res_index_size0, res_index_size1 = res.data[3], res.data[4] - program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{op1_size0}][{op1_size1}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{res_size0}][{res_size1}]; - let zero: {op1.data_type}<{bitwidth}> = 0; - for (let i: ubit<{op1_index_size0}> = 0..{op1_size0}) {{ - for (let j: ubit<{op1_index_size1}> = 0..{op1_size1}) {{ - if ({op1.name}[i][j] > zero) {{ - {res.name}[i][j] := {op1.name}[i][j]; - }} else {{ - {res.name}[i][j] := 0; - }} - }} - }} - """ - return lower_dahlia_program(program, declaration.component_name) + let_zero = f'let zero: {op1.data_type}<{bitwidth}> = 0;' + declarations = pp_dahlia_memory_declarations([op1, res]) - elif op1.type == PrimitiveType.Memory4D: - bitwidth, op1_size0, op1_size1 = op1.data[0], op1.data[1], op1.data[2] - op1_size2, op1_size3, op1_index_size0, = op1.data[3], op1.data[4], op1.data[5] - op1_index_size1, op1_index_size2, op1_index_size3 = op1.data[6], op1.data[7], op1.data[8] - res_bitwidth, res_size0, res_size1 = res.data[0], res.data[1], res.data[2] - res_size2, res_size3, res_index_size0, res_index_size1 = res.data[3], res.data[4], res.data[5], res.data[6] - res_index_size2, res_index_size3 = res.data[7], res.data[8] + indices = "" + variable_name = CHARACTER_I + for i in range(0, num_dimensions): + # Determine loop body indices. + indices += f'[{variable_name}]' + variable_name = next_character(variable_name) - program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{op1_size0}][{op1_size1}][{op1_size2}][{op1_size3}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{res_size0}][{res_size1}][{op1_size2}][{op1_size3}]; - let zero: {op1.data_type}<{bitwidth}> = 0; - for (let i: ubit<{op1_index_size0}> = 0..{op1_size0}) {{ - for (let j: ubit<{op1_index_size1}> = 0..{op1_size1}) {{ - for (let k: ubit<{op1_index_size2}> = 0..{op1_size2}) {{ - for (let l: ubit<{op1_index_size3}> = 0..{op1_size3}) {{ - if ({op1.name}[i][j][k][l] > zero) {{ - {res.name}[i][j][k][l] := {op1.name}[i][j][k][l]; - }} else {{ - {res.name}[i][j][k][l] := 0; - }} - }} - }} - }} - }} - """ - return lower_dahlia_program(program, declaration.component_name) + body = f"""if ({op1.name}{indices} > zero) {{ {res.name}{indices} := {op1.name}{indices}; }} + else {{ {res.name}{indices} := 0; }}""" + loops = pp_dahlia_loop(op1, body) + return lower_dahlia_program(f"""{declarations}{NEWL}{let_zero}{NEWL}{loops}""", declaration.component_name) # TODO(cgyurgyik): Similar to ReLU, this requires signed operands. @@ -300,8 +199,7 @@ def negative(declaration): op1, res = declaration.inputs[0].primitive, declaration.output.primitive bitwidth, size, index_size = op1.data[0], op1.data[1], op1.data[2] program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{size}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{size}]; + {pp_dahlia_memory_declarations([res, op1])} for (let i: ubit<{index_size}> = 0..{size}) {{ {res.name}[i] := -{op1.name}[i]; }} @@ -318,8 +216,7 @@ def expand_dims(declaration): index_size0, index_size1, index_size2 = res.data[4], res.data[5], res.data[6] if axis == 1 and num_newaxis == 2: program = f""" - decl {data.name}: {data.data_type}<{bitwidth}>[{size}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{size0}][{size1}][{size2}]; + {pp_dahlia_memory_declarations([res, data])} for (let i: ubit<{index_size}> = 0..{size}) {{ {res.name}[i][0][0] := {data.name}[i]; }} @@ -340,9 +237,7 @@ def batch_matmul(declaration): # * This third step may not be necessary, but trying to conduct the matrix multiply # directly with the return value declared resulted in incorrect outputs. program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{M1_size0}][{M1_size1}][{M1_size2}]; - decl {op2.name}: {op2.data_type}<{bitwidth}>[{M2_size0}][{M2_size1}][{M2_size2}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M1_size1}][{M2_size1}]; + {pp_dahlia_memory_declarations([res, op1, op2])} let transpose_{op2.name}: {op2.data_type}<{bitwidth}>[{M2_size0}][{M2_size2}][{M2_size1}]; let temporary_{res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M1_size1}][{M2_size1}]; for (let batch: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ @@ -387,9 +282,7 @@ def dense(declaration): M1_index_size0, M1_index_size1 = op1.data[3], op1.data[4] M2_size0, M2_size1, M2_index_size0, M2_index_size1 = op2.data[1], op2.data[2], op2.data[3], op2.data[4] program = f""" - decl {op1.name}: {op1.data_type}<{bitwidth}>[{M1_size0}][{M1_size1}]; - decl {op2.name}: {op2.data_type}<{bitwidth}>[{M2_size0}][{M2_size1}]; - decl {res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M2_size0}]; + {pp_dahlia_memory_declarations([res, op1, op2])} let transpose_{op2.name}: {op2.data_type}<{bitwidth}>[{M2_size1}][{M2_size0}]; let temporary_{res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M2_size0}]; for (let i: ubit<{M2_index_size0}> = 0..{M2_size0}) {{ diff --git a/frontends/relay-futil/pretty_print.py b/frontends/relay-futil/pretty_print.py index f711e78546..7a57e6e6b7 100644 --- a/frontends/relay-futil/pretty_print.py +++ b/frontends/relay-futil/pretty_print.py @@ -105,3 +105,54 @@ def pp_cell(cell: FCell): return f'{cell.declaration.name} = {cell.declaration.component.name};' elif cell.is_dahlia_declaration(): return f'{cell.dahlia_declaration.decl_name} = {cell.dahlia_declaration.component_name};' + + +# Dahlia Pretty Printing. + +def next_character(ch, dir=1): + """ + Returns the next character after 'ch'. + If dir is positive, then will return 'ch' + 1. Otherwise, it will return 'ch' - 1. + """ + return chr(ord(ch) + dir) if dir > 0 else chr(ord(ch) - 1) + + +def pp_dahlia_memory_declarations(declaration_list): + declarations = [] + for decl in declaration_list: + decl_string = f'decl {decl.name}: {decl.data_type}<{decl.data[0]}>' + for i in range(0, decl.type): decl_string += f'[{decl.data[i + 1]}]' + declarations.append(f'{decl_string};') + return '\n'.join(declarations) + + +def pp_dahlia_loop(data, body): + """ + Returns an iteration over data with `body` as the work done within the nested loop(s). + Many tensor functions share the same control flow: (1) Iterate over `data`, and (2) do some work in body. + For example, if `data` is a 2D primitive of size (M, N) and body == `X;`, then this will return: + + ``` + for (let i: ubit = 0..M) { + for (let j: ubit = 0..N) { + X; + } + } + ``` + """ + variable_name = chr(ord('i')) + num_dimensions = data.type + + program = [] + SPACING = '' + for i in range(0, num_dimensions): + size, index_size = data.data[i + 1], data.data[i + num_dimensions + 1] + program.append(f'{SPACING}for (let {variable_name}: ubit<{index_size}> = 0..{size}) {{') + variable_name = next_character(variable_name) + SPACING += ' ' + program.append(f'{SPACING}{body}') + + for i in range(0, num_dimensions): + SPACING = SPACING[:-2] + program.append(f'{SPACING}}}') + return '\n'.join(program) From 1b186732f4db39f295c2aec321aaca5d1ab361a4 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Tue, 24 Nov 2020 08:14:55 -0500 Subject: [PATCH 48/75] Fix comment. --- frontends/relay-futil/dahlia_functions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 6b0e7496a9..c9e5077c22 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -89,8 +89,10 @@ def broadcast(declaration): for i in reversed(range(0, res_dims)): res_sizes.append(res.data[i + 1]) op1_indices, op2_indices, res_indices = [], [], [] - # Get the character associated with 'i' + N, where N == number of dimensions in `op1`. + # Gets the last variable name since we will compare sizes in the reverse direction. variable_name = chr(ord(CHARACTER_I) + op1_dims - 1) + # Determine the value at the N'th index. This will either be `[x]` or `[0]` + # depending on the relationship between the dimensions sizes. for i in range(0, len(res_sizes)): current_dimension, index_zero = f'[{variable_name}]', '[0]' res_indices.append(current_dimension) From 01bfe7411694db81977253fe0151d84eb381aa87 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Tue, 24 Nov 2020 10:58:35 -0500 Subject: [PATCH 49/75] Generalize functions. --- frontends/relay-futil/dahlia_functions.py | 90 +++++++++++++------- frontends/relay-futil/tests/broadcast.expect | 31 +++---- frontends/relay-futil/tests/broadcast.relay | 2 +- 3 files changed, 73 insertions(+), 50 deletions(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index c9e5077c22..7ce89e0d44 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -76,10 +76,7 @@ def broadcast(declaration): result[i][j][k] := op1[i][0][k] + op2[j][0]; ... """ - operand1, operand2 = declaration.inputs[0].primitive, declaration.inputs[1].primitive - res = declaration.output.primitive - op1 = operand1 if operand1.type >= operand2.type else operand2 - op2 = operand2 if op1 == operand1 else operand1 + op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive op1_dims, op2_dims, res_dims = op1.type, op2.type, res.type op1_sizes, op2_sizes, res_sizes = [], [], [] @@ -88,18 +85,22 @@ def broadcast(declaration): for i in reversed(range(0, op2_dims)): op2_sizes.append(op2.data[i + 1]) for i in reversed(range(0, res_dims)): res_sizes.append(res.data[i + 1]) - op1_indices, op2_indices, res_indices = [], [], [] # Gets the last variable name since we will compare sizes in the reverse direction. - variable_name = chr(ord(CHARACTER_I) + op1_dims - 1) - # Determine the value at the N'th index. This will either be `[x]` or `[0]` + variable_name = chr(ord(CHARACTER_I) + res_dims - 1) + # Determine the value at the indices in reverse order. + # For each dimension, this will either be `[x]` for index_variable `x`, or `[0]` # depending on the relationship between the dimensions sizes. + op1_indices, op2_indices, res_indices = [], [], [] for i in range(0, len(res_sizes)): current_dimension, index_zero = f'[{variable_name}]', '[0]' res_indices.append(current_dimension) - if len(op2_sizes) <= i: + if op1_dims > op2_dims and len(op2_sizes) <= i: op1_indices.append(current_dimension) continue - elif op1_sizes[i] == op2_sizes[i]: + if op2_dims > op1_dims and len(op1_sizes) <= i: + op2_indices.append(current_dimension) + continue + if op1_sizes[i] == op2_sizes[i]: op1_indices.append(current_dimension) op2_indices.append(current_dimension) elif op1_sizes[i] > op2_sizes[i]: @@ -110,8 +111,9 @@ def broadcast(declaration): op2_indices.append(current_dimension) variable_name = next_character(variable_name, -1) - # Resulting index in the nested for loop, e.g. for op1[i][j][0][k], this is `[i][j][0][k]`. - op1_index, op2_index = ''.join(reversed(op1_indices)), ''.join(reversed(op2_indices)) + # Resulting index in the nested for loop, e.g. for `op1[i][j][0][k]`, this is `[i][j][0][k]`. + op1_index = ''.join(reversed(op1_indices)) + op2_index = ''.join(reversed(op2_indices)) res_index = ''.join(reversed(res_indices)) loop_body = f'{res.name}{res_index} := {op1.name}{op1_index} {declaration.op} {op2.name}{op2_index};' @@ -139,9 +141,9 @@ def batch_flatten(declaration): declarations = pp_dahlia_memory_declarations([data, res]) let_flattened = f'let {variable_name}: ubit<{res_index_size1}> = 0;' - body = (f"{res.name}{res_indices} := {data.name}{data_indices}; {variable_name} := {variable_name} + 1;") - loops = pp_dahlia_loop(data, body) - program = f"""{declarations}{NEWL}{let_flattened}{NEWL}{loops}""" + body = f"{res.name}{res_indices} := {data.name}{data_indices}; {variable_name} := {variable_name} + 1;" + program_body = pp_dahlia_loop(data, body) + program = f"""{declarations}{NEWL}{let_flattened}{NEWL}{program_body}""" return lower_dahlia_program(program, declaration.component_name) @@ -165,8 +167,8 @@ def bias_add(declaration): declarations = pp_dahlia_memory_declarations([data, bias, res]) body = (f"{res.name}{data_indices} := {data.name}{data_indices} + {bias.name}{bias_index};") - loops = pp_dahlia_loop(data, body) - return lower_dahlia_program(f"""{declarations}{NEWL}{loops}""", declaration.component_name) + program_body = pp_dahlia_loop(data, body) + return lower_dahlia_program(f"""{declarations}{NEWL}{program_body}""", declaration.component_name) # TODO(cgyurgyik): @@ -175,12 +177,12 @@ def bias_add(declaration): # 2. Without signed bit array support, this is also meaningless. def relu(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.relu""" - op1, res = declaration.inputs[0].primitive, declaration.output.primitive - bitwidth, num_dimensions = op1.data[0], op1.type + data, res = declaration.inputs[0].primitive, declaration.output.primitive + bitwidth, num_dimensions = data.data[0], data.type assert res.data_type == 'ubit', f'{res.data_type} is not currently supported for ReLU.' - let_zero = f'let zero: {op1.data_type}<{bitwidth}> = 0;' - declarations = pp_dahlia_memory_declarations([op1, res]) + declarations = pp_dahlia_memory_declarations([data, res]) + let_zero = f'let zero: {data.data_type}<{bitwidth}> = 0;' indices = "" variable_name = CHARACTER_I @@ -189,33 +191,57 @@ def relu(declaration): indices += f'[{variable_name}]' variable_name = next_character(variable_name) - body = f"""if ({op1.name}{indices} > zero) {{ {res.name}{indices} := {op1.name}{indices}; }} + body = f"""if ({data.name}{indices} > zero) {{ {res.name}{indices} := {data.name}{indices}; }} else {{ {res.name}{indices} := 0; }}""" - loops = pp_dahlia_loop(op1, body) - return lower_dahlia_program(f"""{declarations}{NEWL}{let_zero}{NEWL}{loops}""", declaration.component_name) + program_body = pp_dahlia_loop(data, body) + return lower_dahlia_program(f"""{declarations}{NEWL}{let_zero}{NEWL}{program_body}""", declaration.component_name) # TODO(cgyurgyik): Similar to ReLU, this requires signed operands. def negative(declaration): """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.negative""" - op1, res = declaration.inputs[0].primitive, declaration.output.primitive - bitwidth, size, index_size = op1.data[0], op1.data[1], op1.data[2] - program = f""" - {pp_dahlia_memory_declarations([res, op1])} - for (let i: ubit<{index_size}> = 0..{size}) {{ - {res.name}[i] := -{op1.name}[i]; - }} - """ - return lower_dahlia_program(program, declaration.component_name) + op, res = declaration.inputs[0].primitive, declaration.output.primitive + bitwidth, num_dimensions = op.data[0], op.type + + indices = "" + variable_name = CHARACTER_I + for i in range(0, num_dimensions): + # Determine loop body indices. + indices += f'[{variable_name}]' + variable_name = next_character(variable_name) + + declarations = pp_dahlia_memory_declarations([op, res]) + program_body = pp_dahlia_loop(op, f"""{res.name}{indices} := -{op.name}{indices};""") + return lower_dahlia_program(f"""{declarations}{NEWL}{program_body}""", declaration.component_name) def expand_dims(declaration): """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.expand_dims""" axis, num_newaxis = declaration.attributes.get_int("axis"), declaration.attributes.get_int("num_newaxis") data, res = declaration.inputs[0].primitive, declaration.output.primitive + bitwidth, num_dimensions = data.data[0], data.type + + declarations = pp_dahlia_memory_declarations([data, res]) + + res_indices, data_indices = "", "" + variable_name = CHARACTER_I + for i in range(0, num_dimensions): + # Determine loop body indices. + index = f'[{variable_name}]' + res_indices += index + data_indices += index + if axis == i + 1: + for _ in range(0, num_newaxis): res_indices += '[0]' + variable_name = next_character(variable_name) + + program_body = pp_dahlia_loop(data, f'{res.name}{res_indices} := {data.name}{data_indices}') + program = f"""{declarations}{NEWL}{program_body}""" + return lower_dahlia_program(program, declaration.component_name) + bitwidth, size, index_size = data.data[0], data.data[1], data.data[2] size0, size1, size2 = res.data[1], res.data[2], res.data[3] index_size0, index_size1, index_size2 = res.data[4], res.data[5], res.data[6] + if axis == 1 and num_newaxis == 2: program = f""" {pp_dahlia_memory_declarations([res, data])} diff --git a/frontends/relay-futil/tests/broadcast.expect b/frontends/relay-futil/tests/broadcast.expect index 9527534871..84f5962b54 100644 --- a/frontends/relay-futil/tests/broadcast.expect +++ b/frontends/relay-futil/tests/broadcast.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component add(go: 1, clk: 1, x10_0_0_read_data: 32, x10_0_0_done: 1, x20_0_0_read_data: 32, x20_0_0_done: 1, x30_0_0_read_data: 32, x30_0_0_done: 1) -> (done: 1, x10_0_0_addr0: 1, x10_0_0_addr1: 2, x10_0_0_addr2: 2, x10_0_0_write_data: 32, x10_0_0_write_en: 1, x10_0_0_clk: 1, x20_0_0_addr0: 2, x20_0_0_addr1: 1, x20_0_0_addr2: 1, x20_0_0_write_data: 32, x20_0_0_write_en: 1, x20_0_0_clk: 1, x30_0_0_addr0: 2, x30_0_0_addr1: 2, x30_0_0_addr2: 2, x30_0_0_write_data: 32, x30_0_0_write_en: 1, x30_0_0_clk: 1) { +component add(go: 1, clk: 1, x10_0_read_data: 32, x10_0_done: 1, x20_0_0_read_data: 32, x20_0_0_done: 1, x30_0_0_read_data: 32, x30_0_0_done: 1) -> (done: 1, x10_0_addr0: 2, x10_0_addr1: 2, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, x20_0_0_addr0: 2, x20_0_0_addr1: 1, x20_0_0_addr2: 1, x20_0_0_write_data: 32, x20_0_0_write_en: 1, x20_0_0_clk: 1, x30_0_0_addr0: 2, x30_0_0_addr1: 2, x30_0_0_addr2: 2, x30_0_0_write_data: 32, x30_0_0_write_en: 1, x30_0_0_clk: 1) { cells { add0 = prim std_add(32); add1 = prim std_add(2); @@ -9,14 +9,13 @@ component add(go: 1, clk: 1, x10_0_0_read_data: 32, x10_0_0_done: 1, x20_0_0_rea const0 = prim std_const(2, 0); const1 = prim std_const(2, 1); const10 = prim std_const(2, 1); - const11 = prim std_const(2, 1); const2 = prim std_const(2, 0); const3 = prim std_const(2, 1); const4 = prim std_const(2, 0); const5 = prim std_const(2, 1); const6 = prim std_const(1, 0); const7 = prim std_const(1, 0); - const8 = prim std_const(1, 0); + const8 = prim std_const(2, 1); const9 = prim std_const(2, 1); i0 = prim std_reg(2); j0 = prim std_reg(2); @@ -60,16 +59,15 @@ component add(go: 1, clk: 1, x10_0_0_read_data: 32, x10_0_0_done: 1, x20_0_0_rea } group upd0<"static"=1> { x1_read0_0.write_en = 1'd1; - x10_0_0_addr2 = k0.out; - x10_0_0_addr1 = j0.out; - x10_0_0_addr0 = const6.out; - x1_read0_0.in = 1'd1 ? x10_0_0_read_data; + x10_0_addr1 = k0.out; + x10_0_addr0 = j0.out; + x1_read0_0.in = 1'd1 ? x10_0_read_data; upd0[done] = x1_read0_0.done ? 1'd1; } group upd1<"static"=1> { x2_read0_0.write_en = 1'd1; - x20_0_0_addr2 = const8.out; - x20_0_0_addr1 = const7.out; + x20_0_0_addr2 = const7.out; + x20_0_0_addr1 = const6.out; x20_0_0_addr0 = i0.out; x2_read0_0.in = 1'd1 ? x20_0_0_read_data; upd1[done] = x2_read0_0.done ? 1'd1; @@ -87,21 +85,21 @@ component add(go: 1, clk: 1, x10_0_0_read_data: 32, x10_0_0_done: 1, x20_0_0_rea group upd3<"static"=1> { k0.write_en = 1'd1; add1.left = k0.out; - add1.right = const9.out; + add1.right = const8.out; k0.in = 1'd1 ? add1.out; upd3[done] = k0.done ? 1'd1; } group upd4<"static"=1> { j0.write_en = 1'd1; add2.left = j0.out; - add2.right = const10.out; + add2.right = const9.out; j0.in = 1'd1 ? add2.out; upd4[done] = j0.done ? 1'd1; } group upd5<"static"=1> { i0.write_en = 1'd1; add3.left = i0.out; - add3.right = const11.out; + add3.right = const10.out; i0.in = 1'd1 ? add3.out; upd5[done] = i0.done ? 1'd1; } @@ -139,16 +137,15 @@ component add(go: 1, clk: 1, x10_0_0_read_data: 32, x10_0_0_done: 1, x20_0_0_rea component main () -> () { cells { x3 = prim std_mem_d3(32, 2, 2, 2, 2, 2, 2); - x1 = prim std_mem_d3(32, 1, 2, 2, 1, 2, 2); + x1 = prim std_mem_d2(32, 2, 2, 2, 2); x2 = prim std_mem_d3(32, 2, 1, 1, 2, 1, 1); add0 = add; } wires { group run_add { - x1.addr0 = add0.x10_0_0_addr0; - add0.x10_0_0_read_data = x1.read_data; - x1.addr1 = add0.x10_0_0_addr1; - x1.addr2 = add0.x10_0_0_addr2; + x1.addr0 = add0.x10_0_addr0; + add0.x10_0_read_data = x1.read_data; + x1.addr1 = add0.x10_0_addr1; x2.addr0 = add0.x20_0_0_addr0; add0.x20_0_0_read_data = x2.read_data; x2.addr1 = add0.x20_0_0_addr1; diff --git a/frontends/relay-futil/tests/broadcast.relay b/frontends/relay-futil/tests/broadcast.relay index bacd708118..9dfdf5d721 100644 --- a/frontends/relay-futil/tests/broadcast.relay +++ b/frontends/relay-futil/tests/broadcast.relay @@ -1,5 +1,5 @@ v0.0.4 -fn (%x1: Tensor[(1, 2, 2), int32], %x2: Tensor[(2, 1, 1), int32]) { +fn (%x1: Tensor[(2, 2), int32], %x2: Tensor[(2, 1, 1), int32]) { let %x3 = add(%x1, %x2); %x3 } From 9f3a75661f5ad7fbb4b11076155a5ac3e7ce9378 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Tue, 24 Nov 2020 11:44:49 -0500 Subject: [PATCH 50/75] Fix batch_matmul. --- frontends/relay-futil/dahlia_functions.py | 25 ++--------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 7ce89e0d44..084524618b 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -48,14 +48,6 @@ def lower_dahlia_program(prog, component_name): return component -def next_character(ch, dir=1): - """ - Returns the next character after 'ch'. - If dir is positive, then will return 'ch' + 1. Otherwise, it will return 'ch' - 1. - """ - return chr(ord(ch) + dir) if dir > 0 else chr(ord(ch) - 1) - - def broadcast(declaration): """ https://numpy.org/doc/stable/user/basics.broadcasting.html @@ -238,19 +230,6 @@ def expand_dims(declaration): program = f"""{declarations}{NEWL}{program_body}""" return lower_dahlia_program(program, declaration.component_name) - bitwidth, size, index_size = data.data[0], data.data[1], data.data[2] - size0, size1, size2 = res.data[1], res.data[2], res.data[3] - index_size0, index_size1, index_size2 = res.data[4], res.data[5], res.data[6] - - if axis == 1 and num_newaxis == 2: - program = f""" - {pp_dahlia_memory_declarations([res, data])} - for (let i: ubit<{index_size}> = 0..{size}) {{ - {res.name}[i][0][0] := {data.name}[i]; - }} - """ - return lower_dahlia_program(program, declaration.component_name) - def batch_matmul(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_matmul""" @@ -264,8 +243,8 @@ def batch_matmul(declaration): # 3. Copy temporary value to return value.* # * This third step may not be necessary, but trying to conduct the matrix multiply # directly with the return value declared resulted in incorrect outputs. - program = f""" - {pp_dahlia_memory_declarations([res, op1, op2])} + declarations = pp_dahlia_memory_declarations([res, op1, op2]) + program = f"""{declarations} let transpose_{op2.name}: {op2.data_type}<{bitwidth}>[{M2_size0}][{M2_size2}][{M2_size1}]; let temporary_{res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M1_size1}][{M2_size1}]; for (let batch: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ From 05a3935eaeba00ea16c0517ddbe72bc441b82b15 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Tue, 24 Nov 2020 14:02:51 -0500 Subject: [PATCH 51/75] Add mlp_net (incomplete example). --- frontends/relay-futil/tests/mlp_net.expect | 1781 ++++++++++++++++++++ frontends/relay-futil/tests/mlp_net.relay | 17 + 2 files changed, 1798 insertions(+) create mode 100644 frontends/relay-futil/tests/mlp_net.expect create mode 100644 frontends/relay-futil/tests/mlp_net.relay diff --git a/frontends/relay-futil/tests/mlp_net.expect b/frontends/relay-futil/tests/mlp_net.expect new file mode 100644 index 0000000000..ea30df19af --- /dev/null +++ b/frontends/relay-futil/tests/mlp_net.expect @@ -0,0 +1,1781 @@ +import "primitives/std.lib"; + +component bias_add2(go: 1, clk: 1, fc3_bias0_read_data: 32, fc3_bias0_done: 1, x70_0_read_data: 32, x70_0_done: 1, x80_0_read_data: 32, x80_0_done: 1) -> (done: 1, fc3_bias0_addr0: 4, fc3_bias0_write_data: 32, fc3_bias0_write_en: 1, fc3_bias0_clk: 1, x70_0_addr0: 1, x70_0_addr1: 4, x70_0_write_data: 32, x70_0_write_en: 1, x70_0_clk: 1, x80_0_addr0: 1, x80_0_addr1: 4, x80_0_write_data: 32, x80_0_write_en: 1, x80_0_clk: 1) { + cells { + add0 = prim std_add(32); + add1 = prim std_add(4); + add2 = prim std_add(1); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(4, 0); + const3 = prim std_const(4, 9); + const4 = prim std_const(4, 1); + const5 = prim std_const(1, 1); + fc3_bias_read0_0 = prim std_reg(32); + i0 = prim std_reg(1); + j0 = prim std_reg(4); + le0 = prim std_le(1); + le1 = prim std_le(4); + x7_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let1[done] = j0.done; + } + group upd0<"static"=1> { + x7_read0_0.write_en = 1'd1; + x70_0_addr1 = j0.out; + x70_0_addr0 = i0.out; + x7_read0_0.in = 1'd1 ? x70_0_read_data; + upd0[done] = x7_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + fc3_bias_read0_0.write_en = 1'd1; + fc3_bias0_addr0 = j0.out; + fc3_bias_read0_0.in = 1'd1 ? fc3_bias0_read_data; + upd1[done] = fc3_bias_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + x80_0_addr1 = j0.out; + x80_0_addr0 = i0.out; + x80_0_write_en = 1'd1; + add0.left = x7_read0_0.out; + add0.right = fc3_bias_read0_0.out; + x80_0_write_data = 1'd1 ? add0.out; + upd2[done] = x80_0_done ? 1'd1; + } + group upd3<"static"=1> { + j0.write_en = 1'd1; + add1.left = j0.out; + add1.right = const4.out; + j0.in = 1'd1 ? add1.out; + upd3[done] = j0.done ? 1'd1; + } + group upd4<"static"=1> { + i0.write_en = 1'd1; + add2.left = i0.out; + add2.right = const5.out; + i0.in = 1'd1 ? add2.out; + upd4[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + par { + upd0; + upd1; + } + upd2; + upd3; + } + } + upd4; + } + } + } + } +} +component dense2(go: 1, clk: 1, fc3_weight0_0_read_data: 32, fc3_weight0_0_done: 1, x60_0_read_data: 32, x60_0_done: 1, x70_0_read_data: 32, x70_0_done: 1) -> (done: 1, fc3_weight0_0_addr0: 4, fc3_weight0_0_addr1: 7, fc3_weight0_0_write_data: 32, fc3_weight0_0_write_en: 1, fc3_weight0_0_clk: 1, x60_0_addr0: 1, x60_0_addr1: 7, x60_0_write_data: 32, x60_0_write_en: 1, x60_0_clk: 1, x70_0_addr0: 1, x70_0_addr1: 4, x70_0_write_data: 32, x70_0_write_en: 1, x70_0_clk: 1) { + cells { + add0 = prim std_add(7); + add1 = prim std_add(4); + add2 = prim std_add(32); + add3 = prim std_add(7); + add4 = prim std_add(4); + add5 = prim std_add(1); + add6 = prim std_add(4); + add7 = prim std_add(1); + bin_read0_0 = prim std_reg(32); + const0 = prim std_const(4, 0); + const1 = prim std_const(4, 9); + const10 = prim std_const(7, 0); + const11 = prim std_const(7, 63); + const12 = prim std_const(7, 1); + const13 = prim std_const(4, 1); + const14 = prim std_const(1, 1); + const15 = prim std_const(1, 0); + const16 = prim std_const(1, 0); + const17 = prim std_const(4, 0); + const18 = prim std_const(4, 9); + const19 = prim std_const(4, 1); + const2 = prim std_const(7, 0); + const20 = prim std_const(1, 1); + const3 = prim std_const(7, 63); + const4 = prim std_const(7, 1); + const5 = prim std_const(4, 1); + const6 = prim std_const(1, 0); + const7 = prim std_const(1, 0); + const8 = prim std_const(4, 0); + const9 = prim std_const(4, 9); + fc3_weight_read0_0 = prim std_reg(32); + i0 = prim std_reg(4); + i1 = prim std_reg(1); + i2 = prim std_reg(1); + j0 = prim std_reg(7); + j1 = prim std_reg(4); + j2 = prim std_reg(4); + k0 = prim std_reg(7); + le0 = prim std_le(4); + le1 = prim std_le(7); + le2 = prim std_le(1); + le3 = prim std_le(4); + le4 = prim std_le(7); + le5 = prim std_le(1); + le6 = prim std_le(4); + mult_pipe0 = prim std_mult_pipe(32); + product_0 = prim std_reg(32); + temporary_x70_0 = prim std_mem_d2(32, 1, 10, 1, 4); + temporary_x7_read0_0 = prim std_reg(32); + transpose_fc3_weight0_0 = prim std_mem_d2(32, 64, 10, 7, 4); + transpose_fc3_weight_read0_0 = prim std_reg(32); + x6_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = i1.out; + le2.right = const7.out; + } + group cond3<"static"=0> { + cond3[done] = 1'd1; + le3.left = j1.out; + le3.right = const9.out; + } + group cond4<"static"=0> { + cond4[done] = 1'd1; + le4.left = k0.out; + le4.right = const11.out; + } + group cond5<"static"=0> { + cond5[done] = 1'd1; + le5.left = i2.out; + le5.right = const16.out; + } + group cond6<"static"=0> { + cond6[done] = 1'd1; + le6.left = j2.out; + le6.right = const18.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let1[done] = j0.done; + } + group let2<"static"=1> { + i1.in = const6.out; + i1.write_en = 1'd1; + let2[done] = i1.done; + } + group let3<"static"=1> { + j1.in = const8.out; + j1.write_en = 1'd1; + let3[done] = j1.done; + } + group let4<"static"=1> { + k0.in = const10.out; + k0.write_en = 1'd1; + let4[done] = k0.done; + } + group let5<"static"=4> { + bin_read0_0.in = mult_pipe0.out; + bin_read0_0.write_en = mult_pipe0.done; + let5[done] = bin_read0_0.done; + mult_pipe0.left = x6_read0_0.out; + mult_pipe0.right = transpose_fc3_weight_read0_0.out; + mult_pipe0.go = !mult_pipe0.done ? 1'd1; + } + group let6<"static"=1> { + product_0.in = bin_read0_0.out; + product_0.write_en = 1'd1; + let6[done] = product_0.done; + } + group let7<"static"=1> { + i2.in = const15.out; + i2.write_en = 1'd1; + let7[done] = i2.done; + } + group let8<"static"=1> { + j2.in = const17.out; + j2.write_en = 1'd1; + let8[done] = j2.done; + } + group upd0<"static"=1> { + fc3_weight_read0_0.write_en = 1'd1; + fc3_weight0_0_addr1 = j0.out; + fc3_weight0_0_addr0 = i0.out; + fc3_weight_read0_0.in = 1'd1 ? fc3_weight0_0_read_data; + upd0[done] = fc3_weight_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + transpose_fc3_weight0_0.addr1 = i0.out; + transpose_fc3_weight0_0.addr0 = j0.out; + transpose_fc3_weight0_0.write_en = 1'd1; + transpose_fc3_weight0_0.write_data = 1'd1 ? fc3_weight_read0_0.out; + upd1[done] = transpose_fc3_weight0_0.done ? 1'd1; + } + group upd10<"static"=1> { + temporary_x7_read0_0.write_en = 1'd1; + temporary_x70_0.addr1 = j2.out; + temporary_x70_0.addr0 = i2.out; + temporary_x7_read0_0.in = 1'd1 ? temporary_x70_0.read_data; + upd10[done] = temporary_x7_read0_0.done ? 1'd1; + } + group upd11<"static"=1> { + x70_0_addr1 = j2.out; + x70_0_addr0 = i2.out; + x70_0_write_en = 1'd1; + x70_0_write_data = 1'd1 ? temporary_x7_read0_0.out; + upd11[done] = x70_0_done ? 1'd1; + } + group upd12<"static"=1> { + j2.write_en = 1'd1; + add6.left = j2.out; + add6.right = const19.out; + j2.in = 1'd1 ? add6.out; + upd12[done] = j2.done ? 1'd1; + } + group upd13<"static"=1> { + i2.write_en = 1'd1; + add7.left = i2.out; + add7.right = const20.out; + i2.in = 1'd1 ? add7.out; + upd13[done] = i2.done ? 1'd1; + } + group upd2<"static"=1> { + j0.write_en = 1'd1; + add0.left = j0.out; + add0.right = const4.out; + j0.in = 1'd1 ? add0.out; + upd2[done] = j0.done ? 1'd1; + } + group upd3<"static"=1> { + i0.write_en = 1'd1; + add1.left = i0.out; + add1.right = const5.out; + i0.in = 1'd1 ? add1.out; + upd3[done] = i0.done ? 1'd1; + } + group upd4<"static"=1> { + x6_read0_0.write_en = 1'd1; + x60_0_addr1 = k0.out; + x60_0_addr0 = i1.out; + x6_read0_0.in = 1'd1 ? x60_0_read_data; + upd4[done] = x6_read0_0.done ? 1'd1; + } + group upd5<"static"=1> { + transpose_fc3_weight_read0_0.write_en = 1'd1; + transpose_fc3_weight0_0.addr1 = j1.out; + transpose_fc3_weight0_0.addr0 = k0.out; + transpose_fc3_weight_read0_0.in = 1'd1 ? transpose_fc3_weight0_0.read_data; + upd5[done] = transpose_fc3_weight_read0_0.done ? 1'd1; + } + group upd6<"static"=1> { + temporary_x70_0.addr1 = j1.out; + temporary_x70_0.addr0 = i1.out; + temporary_x70_0.write_en = 1'd1; + add2.left = temporary_x70_0.read_data; + add2.right = product_0.out; + temporary_x70_0.addr1 = j1.out; + temporary_x70_0.addr0 = i1.out; + temporary_x70_0.write_data = 1'd1 ? add2.out; + upd6[done] = temporary_x70_0.done ? 1'd1; + } + group upd7<"static"=1> { + k0.write_en = 1'd1; + add3.left = k0.out; + add3.right = const12.out; + k0.in = 1'd1 ? add3.out; + upd7[done] = k0.done ? 1'd1; + } + group upd8<"static"=1> { + j1.write_en = 1'd1; + add4.left = j1.out; + add4.right = const13.out; + j1.in = 1'd1 ? add4.out; + upd8[done] = j1.done ? 1'd1; + } + group upd9<"static"=1> { + i1.write_en = 1'd1; + add5.left = i1.out; + add5.right = const14.out; + i1.in = 1'd1 ? add5.out; + upd9[done] = i1.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + upd0; + upd1; + upd2; + } + } + upd3; + } + } + let2; + while le2.out with cond2 { + seq { + let3; + while le3.out with cond3 { + seq { + let4; + while le4.out with cond4 { + seq { + par { + upd4; + upd5; + } + let5; + let6; + upd6; + upd7; + } + } + upd8; + } + } + upd9; + } + } + let7; + while le5.out with cond5 { + seq { + let8; + while le6.out with cond6 { + seq { + upd10; + upd11; + upd12; + } + } + upd13; + } + } + } + } +} +component relu1(go: 1, clk: 1, x50_0_read_data: 32, x50_0_done: 1, x60_0_read_data: 32, x60_0_done: 1) -> (done: 1, x50_0_addr0: 1, x50_0_addr1: 7, x50_0_write_data: 32, x50_0_write_en: 1, x50_0_clk: 1, x60_0_addr0: 1, x60_0_addr1: 7, x60_0_write_data: 32, x60_0_write_en: 1, x60_0_clk: 1) { + cells { + add0 = prim std_add(7); + add1 = prim std_add(1); + const0 = prim std_const(32, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(1, 0); + const3 = prim std_const(7, 0); + const4 = prim std_const(7, 63); + const5 = prim std_const(32, 0); + const6 = prim std_const(7, 1); + const7 = prim std_const(1, 1); + gt0 = prim std_gt(32); + i0 = prim std_reg(1); + j0 = prim std_reg(7); + le0 = prim std_le(1); + le1 = prim std_le(7); + x5_read0_0 = prim std_reg(32); + x5_read1_0 = prim std_reg(32); + zero_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const2.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const4.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + gt0.left = x5_read0_0.out; + gt0.right = zero_0.out; + } + group let0<"static"=1> { + zero_0.in = const0.out; + zero_0.write_en = 1'd1; + let0[done] = zero_0.done; + } + group let1<"static"=1> { + i0.in = const1.out; + i0.write_en = 1'd1; + let1[done] = i0.done; + } + group let2<"static"=1> { + j0.in = const3.out; + j0.write_en = 1'd1; + let2[done] = j0.done; + } + group upd0<"static"=1> { + x5_read0_0.write_en = 1'd1; + x50_0_addr1 = j0.out; + x50_0_addr0 = i0.out; + x5_read0_0.in = 1'd1 ? x50_0_read_data; + upd0[done] = x5_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + x5_read1_0.write_en = 1'd1; + x50_0_addr1 = j0.out; + x50_0_addr0 = i0.out; + x5_read1_0.in = 1'd1 ? x50_0_read_data; + upd1[done] = x5_read1_0.done ? 1'd1; + } + group upd2<"static"=1> { + x60_0_addr1 = j0.out; + x60_0_addr0 = i0.out; + x60_0_write_en = 1'd1; + x60_0_write_data = 1'd1 ? x5_read1_0.out; + upd2[done] = x60_0_done ? 1'd1; + } + group upd3<"static"=1> { + x60_0_addr1 = j0.out; + x60_0_addr0 = i0.out; + x60_0_write_en = 1'd1; + x60_0_write_data = 1'd1 ? const5.out; + upd3[done] = x60_0_done ? 1'd1; + } + group upd4<"static"=1> { + j0.write_en = 1'd1; + add0.left = j0.out; + add0.right = const6.out; + j0.in = 1'd1 ? add0.out; + upd4[done] = j0.done ? 1'd1; + } + group upd5<"static"=1> { + i0.write_en = 1'd1; + add1.left = i0.out; + add1.right = const7.out; + i0.in = 1'd1 ? add1.out; + upd5[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + let1; + while le0.out with cond0 { + seq { + let2; + while le1.out with cond1 { + seq { + upd0; + if gt0.out with cond2 { + seq { + upd1; + upd2; + } + } else { + upd3; + } + upd4; + } + } + upd5; + } + } + } + } +} +component bias_add1(go: 1, clk: 1, fc2_bias0_read_data: 32, fc2_bias0_done: 1, x40_0_read_data: 32, x40_0_done: 1, x50_0_read_data: 32, x50_0_done: 1) -> (done: 1, fc2_bias0_addr0: 7, fc2_bias0_write_data: 32, fc2_bias0_write_en: 1, fc2_bias0_clk: 1, x40_0_addr0: 1, x40_0_addr1: 7, x40_0_write_data: 32, x40_0_write_en: 1, x40_0_clk: 1, x50_0_addr0: 1, x50_0_addr1: 7, x50_0_write_data: 32, x50_0_write_en: 1, x50_0_clk: 1) { + cells { + add0 = prim std_add(32); + add1 = prim std_add(7); + add2 = prim std_add(1); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(7, 0); + const3 = prim std_const(7, 63); + const4 = prim std_const(7, 1); + const5 = prim std_const(1, 1); + fc2_bias_read0_0 = prim std_reg(32); + i0 = prim std_reg(1); + j0 = prim std_reg(7); + le0 = prim std_le(1); + le1 = prim std_le(7); + x4_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let1[done] = j0.done; + } + group upd0<"static"=1> { + x4_read0_0.write_en = 1'd1; + x40_0_addr1 = j0.out; + x40_0_addr0 = i0.out; + x4_read0_0.in = 1'd1 ? x40_0_read_data; + upd0[done] = x4_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + fc2_bias_read0_0.write_en = 1'd1; + fc2_bias0_addr0 = j0.out; + fc2_bias_read0_0.in = 1'd1 ? fc2_bias0_read_data; + upd1[done] = fc2_bias_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + x50_0_addr1 = j0.out; + x50_0_addr0 = i0.out; + x50_0_write_en = 1'd1; + add0.left = x4_read0_0.out; + add0.right = fc2_bias_read0_0.out; + x50_0_write_data = 1'd1 ? add0.out; + upd2[done] = x50_0_done ? 1'd1; + } + group upd3<"static"=1> { + j0.write_en = 1'd1; + add1.left = j0.out; + add1.right = const4.out; + j0.in = 1'd1 ? add1.out; + upd3[done] = j0.done ? 1'd1; + } + group upd4<"static"=1> { + i0.write_en = 1'd1; + add2.left = i0.out; + add2.right = const5.out; + i0.in = 1'd1 ? add2.out; + upd4[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + par { + upd0; + upd1; + } + upd2; + upd3; + } + } + upd4; + } + } + } + } +} +component dense1(go: 1, clk: 1, fc2_weight0_0_read_data: 32, fc2_weight0_0_done: 1, x30_0_read_data: 32, x30_0_done: 1, x40_0_read_data: 32, x40_0_done: 1) -> (done: 1, fc2_weight0_0_addr0: 7, fc2_weight0_0_addr1: 8, fc2_weight0_0_write_data: 32, fc2_weight0_0_write_en: 1, fc2_weight0_0_clk: 1, x30_0_addr0: 1, x30_0_addr1: 8, x30_0_write_data: 32, x30_0_write_en: 1, x30_0_clk: 1, x40_0_addr0: 1, x40_0_addr1: 7, x40_0_write_data: 32, x40_0_write_en: 1, x40_0_clk: 1) { + cells { + add0 = prim std_add(8); + add1 = prim std_add(7); + add2 = prim std_add(32); + add3 = prim std_add(8); + add4 = prim std_add(7); + add5 = prim std_add(1); + add6 = prim std_add(7); + add7 = prim std_add(1); + bin_read0_0 = prim std_reg(32); + const0 = prim std_const(7, 0); + const1 = prim std_const(7, 63); + const10 = prim std_const(8, 0); + const11 = prim std_const(8, 127); + const12 = prim std_const(8, 1); + const13 = prim std_const(7, 1); + const14 = prim std_const(1, 1); + const15 = prim std_const(1, 0); + const16 = prim std_const(1, 0); + const17 = prim std_const(7, 0); + const18 = prim std_const(7, 63); + const19 = prim std_const(7, 1); + const2 = prim std_const(8, 0); + const20 = prim std_const(1, 1); + const3 = prim std_const(8, 127); + const4 = prim std_const(8, 1); + const5 = prim std_const(7, 1); + const6 = prim std_const(1, 0); + const7 = prim std_const(1, 0); + const8 = prim std_const(7, 0); + const9 = prim std_const(7, 63); + fc2_weight_read0_0 = prim std_reg(32); + i0 = prim std_reg(7); + i1 = prim std_reg(1); + i2 = prim std_reg(1); + j0 = prim std_reg(8); + j1 = prim std_reg(7); + j2 = prim std_reg(7); + k0 = prim std_reg(8); + le0 = prim std_le(7); + le1 = prim std_le(8); + le2 = prim std_le(1); + le3 = prim std_le(7); + le4 = prim std_le(8); + le5 = prim std_le(1); + le6 = prim std_le(7); + mult_pipe0 = prim std_mult_pipe(32); + product_0 = prim std_reg(32); + temporary_x40_0 = prim std_mem_d2(32, 1, 64, 1, 7); + temporary_x4_read0_0 = prim std_reg(32); + transpose_fc2_weight0_0 = prim std_mem_d2(32, 128, 64, 8, 7); + transpose_fc2_weight_read0_0 = prim std_reg(32); + x3_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = i1.out; + le2.right = const7.out; + } + group cond3<"static"=0> { + cond3[done] = 1'd1; + le3.left = j1.out; + le3.right = const9.out; + } + group cond4<"static"=0> { + cond4[done] = 1'd1; + le4.left = k0.out; + le4.right = const11.out; + } + group cond5<"static"=0> { + cond5[done] = 1'd1; + le5.left = i2.out; + le5.right = const16.out; + } + group cond6<"static"=0> { + cond6[done] = 1'd1; + le6.left = j2.out; + le6.right = const18.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let1[done] = j0.done; + } + group let2<"static"=1> { + i1.in = const6.out; + i1.write_en = 1'd1; + let2[done] = i1.done; + } + group let3<"static"=1> { + j1.in = const8.out; + j1.write_en = 1'd1; + let3[done] = j1.done; + } + group let4<"static"=1> { + k0.in = const10.out; + k0.write_en = 1'd1; + let4[done] = k0.done; + } + group let5<"static"=4> { + bin_read0_0.in = mult_pipe0.out; + bin_read0_0.write_en = mult_pipe0.done; + let5[done] = bin_read0_0.done; + mult_pipe0.left = x3_read0_0.out; + mult_pipe0.right = transpose_fc2_weight_read0_0.out; + mult_pipe0.go = !mult_pipe0.done ? 1'd1; + } + group let6<"static"=1> { + product_0.in = bin_read0_0.out; + product_0.write_en = 1'd1; + let6[done] = product_0.done; + } + group let7<"static"=1> { + i2.in = const15.out; + i2.write_en = 1'd1; + let7[done] = i2.done; + } + group let8<"static"=1> { + j2.in = const17.out; + j2.write_en = 1'd1; + let8[done] = j2.done; + } + group upd0<"static"=1> { + fc2_weight_read0_0.write_en = 1'd1; + fc2_weight0_0_addr1 = j0.out; + fc2_weight0_0_addr0 = i0.out; + fc2_weight_read0_0.in = 1'd1 ? fc2_weight0_0_read_data; + upd0[done] = fc2_weight_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + transpose_fc2_weight0_0.addr1 = i0.out; + transpose_fc2_weight0_0.addr0 = j0.out; + transpose_fc2_weight0_0.write_en = 1'd1; + transpose_fc2_weight0_0.write_data = 1'd1 ? fc2_weight_read0_0.out; + upd1[done] = transpose_fc2_weight0_0.done ? 1'd1; + } + group upd10<"static"=1> { + temporary_x4_read0_0.write_en = 1'd1; + temporary_x40_0.addr1 = j2.out; + temporary_x40_0.addr0 = i2.out; + temporary_x4_read0_0.in = 1'd1 ? temporary_x40_0.read_data; + upd10[done] = temporary_x4_read0_0.done ? 1'd1; + } + group upd11<"static"=1> { + x40_0_addr1 = j2.out; + x40_0_addr0 = i2.out; + x40_0_write_en = 1'd1; + x40_0_write_data = 1'd1 ? temporary_x4_read0_0.out; + upd11[done] = x40_0_done ? 1'd1; + } + group upd12<"static"=1> { + j2.write_en = 1'd1; + add6.left = j2.out; + add6.right = const19.out; + j2.in = 1'd1 ? add6.out; + upd12[done] = j2.done ? 1'd1; + } + group upd13<"static"=1> { + i2.write_en = 1'd1; + add7.left = i2.out; + add7.right = const20.out; + i2.in = 1'd1 ? add7.out; + upd13[done] = i2.done ? 1'd1; + } + group upd2<"static"=1> { + j0.write_en = 1'd1; + add0.left = j0.out; + add0.right = const4.out; + j0.in = 1'd1 ? add0.out; + upd2[done] = j0.done ? 1'd1; + } + group upd3<"static"=1> { + i0.write_en = 1'd1; + add1.left = i0.out; + add1.right = const5.out; + i0.in = 1'd1 ? add1.out; + upd3[done] = i0.done ? 1'd1; + } + group upd4<"static"=1> { + x3_read0_0.write_en = 1'd1; + x30_0_addr1 = k0.out; + x30_0_addr0 = i1.out; + x3_read0_0.in = 1'd1 ? x30_0_read_data; + upd4[done] = x3_read0_0.done ? 1'd1; + } + group upd5<"static"=1> { + transpose_fc2_weight_read0_0.write_en = 1'd1; + transpose_fc2_weight0_0.addr1 = j1.out; + transpose_fc2_weight0_0.addr0 = k0.out; + transpose_fc2_weight_read0_0.in = 1'd1 ? transpose_fc2_weight0_0.read_data; + upd5[done] = transpose_fc2_weight_read0_0.done ? 1'd1; + } + group upd6<"static"=1> { + temporary_x40_0.addr1 = j1.out; + temporary_x40_0.addr0 = i1.out; + temporary_x40_0.write_en = 1'd1; + add2.left = temporary_x40_0.read_data; + add2.right = product_0.out; + temporary_x40_0.addr1 = j1.out; + temporary_x40_0.addr0 = i1.out; + temporary_x40_0.write_data = 1'd1 ? add2.out; + upd6[done] = temporary_x40_0.done ? 1'd1; + } + group upd7<"static"=1> { + k0.write_en = 1'd1; + add3.left = k0.out; + add3.right = const12.out; + k0.in = 1'd1 ? add3.out; + upd7[done] = k0.done ? 1'd1; + } + group upd8<"static"=1> { + j1.write_en = 1'd1; + add4.left = j1.out; + add4.right = const13.out; + j1.in = 1'd1 ? add4.out; + upd8[done] = j1.done ? 1'd1; + } + group upd9<"static"=1> { + i1.write_en = 1'd1; + add5.left = i1.out; + add5.right = const14.out; + i1.in = 1'd1 ? add5.out; + upd9[done] = i1.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + upd0; + upd1; + upd2; + } + } + upd3; + } + } + let2; + while le2.out with cond2 { + seq { + let3; + while le3.out with cond3 { + seq { + let4; + while le4.out with cond4 { + seq { + par { + upd4; + upd5; + } + let5; + let6; + upd6; + upd7; + } + } + upd8; + } + } + upd9; + } + } + let7; + while le5.out with cond5 { + seq { + let8; + while le6.out with cond6 { + seq { + upd10; + upd11; + upd12; + } + } + upd13; + } + } + } + } +} +component relu(go: 1, clk: 1, x20_0_read_data: 32, x20_0_done: 1, x30_0_read_data: 32, x30_0_done: 1) -> (done: 1, x20_0_addr0: 1, x20_0_addr1: 8, x20_0_write_data: 32, x20_0_write_en: 1, x20_0_clk: 1, x30_0_addr0: 1, x30_0_addr1: 8, x30_0_write_data: 32, x30_0_write_en: 1, x30_0_clk: 1) { + cells { + add0 = prim std_add(8); + add1 = prim std_add(1); + const0 = prim std_const(32, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(1, 0); + const3 = prim std_const(8, 0); + const4 = prim std_const(8, 127); + const5 = prim std_const(32, 0); + const6 = prim std_const(8, 1); + const7 = prim std_const(1, 1); + gt0 = prim std_gt(32); + i0 = prim std_reg(1); + j0 = prim std_reg(8); + le0 = prim std_le(1); + le1 = prim std_le(8); + x2_read0_0 = prim std_reg(32); + x2_read1_0 = prim std_reg(32); + zero_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const2.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const4.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + gt0.left = x2_read0_0.out; + gt0.right = zero_0.out; + } + group let0<"static"=1> { + zero_0.in = const0.out; + zero_0.write_en = 1'd1; + let0[done] = zero_0.done; + } + group let1<"static"=1> { + i0.in = const1.out; + i0.write_en = 1'd1; + let1[done] = i0.done; + } + group let2<"static"=1> { + j0.in = const3.out; + j0.write_en = 1'd1; + let2[done] = j0.done; + } + group upd0<"static"=1> { + x2_read0_0.write_en = 1'd1; + x20_0_addr1 = j0.out; + x20_0_addr0 = i0.out; + x2_read0_0.in = 1'd1 ? x20_0_read_data; + upd0[done] = x2_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + x2_read1_0.write_en = 1'd1; + x20_0_addr1 = j0.out; + x20_0_addr0 = i0.out; + x2_read1_0.in = 1'd1 ? x20_0_read_data; + upd1[done] = x2_read1_0.done ? 1'd1; + } + group upd2<"static"=1> { + x30_0_addr1 = j0.out; + x30_0_addr0 = i0.out; + x30_0_write_en = 1'd1; + x30_0_write_data = 1'd1 ? x2_read1_0.out; + upd2[done] = x30_0_done ? 1'd1; + } + group upd3<"static"=1> { + x30_0_addr1 = j0.out; + x30_0_addr0 = i0.out; + x30_0_write_en = 1'd1; + x30_0_write_data = 1'd1 ? const5.out; + upd3[done] = x30_0_done ? 1'd1; + } + group upd4<"static"=1> { + j0.write_en = 1'd1; + add0.left = j0.out; + add0.right = const6.out; + j0.in = 1'd1 ? add0.out; + upd4[done] = j0.done ? 1'd1; + } + group upd5<"static"=1> { + i0.write_en = 1'd1; + add1.left = i0.out; + add1.right = const7.out; + i0.in = 1'd1 ? add1.out; + upd5[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + let1; + while le0.out with cond0 { + seq { + let2; + while le1.out with cond1 { + seq { + upd0; + if gt0.out with cond2 { + seq { + upd1; + upd2; + } + } else { + upd3; + } + upd4; + } + } + upd5; + } + } + } + } +} +component bias_add(go: 1, clk: 1, fc1_bias0_read_data: 32, fc1_bias0_done: 1, x10_0_read_data: 32, x10_0_done: 1, x20_0_read_data: 32, x20_0_done: 1) -> (done: 1, fc1_bias0_addr0: 8, fc1_bias0_write_data: 32, fc1_bias0_write_en: 1, fc1_bias0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 8, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, x20_0_addr0: 1, x20_0_addr1: 8, x20_0_write_data: 32, x20_0_write_en: 1, x20_0_clk: 1) { + cells { + add0 = prim std_add(32); + add1 = prim std_add(8); + add2 = prim std_add(1); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(8, 0); + const3 = prim std_const(8, 127); + const4 = prim std_const(8, 1); + const5 = prim std_const(1, 1); + fc1_bias_read0_0 = prim std_reg(32); + i0 = prim std_reg(1); + j0 = prim std_reg(8); + le0 = prim std_le(1); + le1 = prim std_le(8); + x1_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let1[done] = j0.done; + } + group upd0<"static"=1> { + x1_read0_0.write_en = 1'd1; + x10_0_addr1 = j0.out; + x10_0_addr0 = i0.out; + x1_read0_0.in = 1'd1 ? x10_0_read_data; + upd0[done] = x1_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + fc1_bias_read0_0.write_en = 1'd1; + fc1_bias0_addr0 = j0.out; + fc1_bias_read0_0.in = 1'd1 ? fc1_bias0_read_data; + upd1[done] = fc1_bias_read0_0.done ? 1'd1; + } + group upd2<"static"=1> { + x20_0_addr1 = j0.out; + x20_0_addr0 = i0.out; + x20_0_write_en = 1'd1; + add0.left = x1_read0_0.out; + add0.right = fc1_bias_read0_0.out; + x20_0_write_data = 1'd1 ? add0.out; + upd2[done] = x20_0_done ? 1'd1; + } + group upd3<"static"=1> { + j0.write_en = 1'd1; + add1.left = j0.out; + add1.right = const4.out; + j0.in = 1'd1 ? add1.out; + upd3[done] = j0.done ? 1'd1; + } + group upd4<"static"=1> { + i0.write_en = 1'd1; + add2.left = i0.out; + add2.right = const5.out; + i0.in = 1'd1 ? add2.out; + upd4[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + par { + upd0; + upd1; + } + upd2; + upd3; + } + } + upd4; + } + } + } + } +} +component dense(go: 1, clk: 1, fc1_weight0_0_read_data: 32, fc1_weight0_0_done: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, fc1_weight0_0_addr0: 8, fc1_weight0_0_addr1: 10, fc1_weight0_0_write_data: 32, fc1_weight0_0_write_en: 1, fc1_weight0_0_clk: 1, x0_0_addr0: 1, x0_0_addr1: 10, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 8, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { + cells { + add0 = prim std_add(10); + add1 = prim std_add(8); + add2 = prim std_add(32); + add3 = prim std_add(10); + add4 = prim std_add(8); + add5 = prim std_add(1); + add6 = prim std_add(8); + add7 = prim std_add(1); + bin_read0_0 = prim std_reg(32); + const0 = prim std_const(8, 0); + const1 = prim std_const(8, 127); + const10 = prim std_const(10, 0); + const11 = prim std_const(10, 783); + const12 = prim std_const(10, 1); + const13 = prim std_const(8, 1); + const14 = prim std_const(1, 1); + const15 = prim std_const(1, 0); + const16 = prim std_const(1, 0); + const17 = prim std_const(8, 0); + const18 = prim std_const(8, 127); + const19 = prim std_const(8, 1); + const2 = prim std_const(10, 0); + const20 = prim std_const(1, 1); + const3 = prim std_const(10, 783); + const4 = prim std_const(10, 1); + const5 = prim std_const(8, 1); + const6 = prim std_const(1, 0); + const7 = prim std_const(1, 0); + const8 = prim std_const(8, 0); + const9 = prim std_const(8, 127); + fc1_weight_read0_0 = prim std_reg(32); + i0 = prim std_reg(8); + i1 = prim std_reg(1); + i2 = prim std_reg(1); + j0 = prim std_reg(10); + j1 = prim std_reg(8); + j2 = prim std_reg(8); + k0 = prim std_reg(10); + le0 = prim std_le(8); + le1 = prim std_le(10); + le2 = prim std_le(1); + le3 = prim std_le(8); + le4 = prim std_le(10); + le5 = prim std_le(1); + le6 = prim std_le(8); + mult_pipe0 = prim std_mult_pipe(32); + product_0 = prim std_reg(32); + temporary_x10_0 = prim std_mem_d2(32, 1, 128, 1, 8); + temporary_x1_read0_0 = prim std_reg(32); + transpose_fc1_weight0_0 = prim std_mem_d2(32, 784, 128, 10, 8); + transpose_fc1_weight_read0_0 = prim std_reg(32); + x_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = i1.out; + le2.right = const7.out; + } + group cond3<"static"=0> { + cond3[done] = 1'd1; + le3.left = j1.out; + le3.right = const9.out; + } + group cond4<"static"=0> { + cond4[done] = 1'd1; + le4.left = k0.out; + le4.right = const11.out; + } + group cond5<"static"=0> { + cond5[done] = 1'd1; + le5.left = i2.out; + le5.right = const16.out; + } + group cond6<"static"=0> { + cond6[done] = 1'd1; + le6.left = j2.out; + le6.right = const18.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let1[done] = j0.done; + } + group let2<"static"=1> { + i1.in = const6.out; + i1.write_en = 1'd1; + let2[done] = i1.done; + } + group let3<"static"=1> { + j1.in = const8.out; + j1.write_en = 1'd1; + let3[done] = j1.done; + } + group let4<"static"=1> { + k0.in = const10.out; + k0.write_en = 1'd1; + let4[done] = k0.done; + } + group let5<"static"=4> { + bin_read0_0.in = mult_pipe0.out; + bin_read0_0.write_en = mult_pipe0.done; + let5[done] = bin_read0_0.done; + mult_pipe0.left = x_read0_0.out; + mult_pipe0.right = transpose_fc1_weight_read0_0.out; + mult_pipe0.go = !mult_pipe0.done ? 1'd1; + } + group let6<"static"=1> { + product_0.in = bin_read0_0.out; + product_0.write_en = 1'd1; + let6[done] = product_0.done; + } + group let7<"static"=1> { + i2.in = const15.out; + i2.write_en = 1'd1; + let7[done] = i2.done; + } + group let8<"static"=1> { + j2.in = const17.out; + j2.write_en = 1'd1; + let8[done] = j2.done; + } + group upd0<"static"=1> { + fc1_weight_read0_0.write_en = 1'd1; + fc1_weight0_0_addr1 = j0.out; + fc1_weight0_0_addr0 = i0.out; + fc1_weight_read0_0.in = 1'd1 ? fc1_weight0_0_read_data; + upd0[done] = fc1_weight_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + transpose_fc1_weight0_0.addr1 = i0.out; + transpose_fc1_weight0_0.addr0 = j0.out; + transpose_fc1_weight0_0.write_en = 1'd1; + transpose_fc1_weight0_0.write_data = 1'd1 ? fc1_weight_read0_0.out; + upd1[done] = transpose_fc1_weight0_0.done ? 1'd1; + } + group upd10<"static"=1> { + temporary_x1_read0_0.write_en = 1'd1; + temporary_x10_0.addr1 = j2.out; + temporary_x10_0.addr0 = i2.out; + temporary_x1_read0_0.in = 1'd1 ? temporary_x10_0.read_data; + upd10[done] = temporary_x1_read0_0.done ? 1'd1; + } + group upd11<"static"=1> { + x10_0_addr1 = j2.out; + x10_0_addr0 = i2.out; + x10_0_write_en = 1'd1; + x10_0_write_data = 1'd1 ? temporary_x1_read0_0.out; + upd11[done] = x10_0_done ? 1'd1; + } + group upd12<"static"=1> { + j2.write_en = 1'd1; + add6.left = j2.out; + add6.right = const19.out; + j2.in = 1'd1 ? add6.out; + upd12[done] = j2.done ? 1'd1; + } + group upd13<"static"=1> { + i2.write_en = 1'd1; + add7.left = i2.out; + add7.right = const20.out; + i2.in = 1'd1 ? add7.out; + upd13[done] = i2.done ? 1'd1; + } + group upd2<"static"=1> { + j0.write_en = 1'd1; + add0.left = j0.out; + add0.right = const4.out; + j0.in = 1'd1 ? add0.out; + upd2[done] = j0.done ? 1'd1; + } + group upd3<"static"=1> { + i0.write_en = 1'd1; + add1.left = i0.out; + add1.right = const5.out; + i0.in = 1'd1 ? add1.out; + upd3[done] = i0.done ? 1'd1; + } + group upd4<"static"=1> { + x_read0_0.write_en = 1'd1; + x0_0_addr1 = k0.out; + x0_0_addr0 = i1.out; + x_read0_0.in = 1'd1 ? x0_0_read_data; + upd4[done] = x_read0_0.done ? 1'd1; + } + group upd5<"static"=1> { + transpose_fc1_weight_read0_0.write_en = 1'd1; + transpose_fc1_weight0_0.addr1 = j1.out; + transpose_fc1_weight0_0.addr0 = k0.out; + transpose_fc1_weight_read0_0.in = 1'd1 ? transpose_fc1_weight0_0.read_data; + upd5[done] = transpose_fc1_weight_read0_0.done ? 1'd1; + } + group upd6<"static"=1> { + temporary_x10_0.addr1 = j1.out; + temporary_x10_0.addr0 = i1.out; + temporary_x10_0.write_en = 1'd1; + add2.left = temporary_x10_0.read_data; + add2.right = product_0.out; + temporary_x10_0.addr1 = j1.out; + temporary_x10_0.addr0 = i1.out; + temporary_x10_0.write_data = 1'd1 ? add2.out; + upd6[done] = temporary_x10_0.done ? 1'd1; + } + group upd7<"static"=1> { + k0.write_en = 1'd1; + add3.left = k0.out; + add3.right = const12.out; + k0.in = 1'd1 ? add3.out; + upd7[done] = k0.done ? 1'd1; + } + group upd8<"static"=1> { + j1.write_en = 1'd1; + add4.left = j1.out; + add4.right = const13.out; + j1.in = 1'd1 ? add4.out; + upd8[done] = j1.done ? 1'd1; + } + group upd9<"static"=1> { + i1.write_en = 1'd1; + add5.left = i1.out; + add5.right = const14.out; + i1.in = 1'd1 ? add5.out; + upd9[done] = i1.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + upd0; + upd1; + upd2; + } + } + upd3; + } + } + let2; + while le2.out with cond2 { + seq { + let3; + while le3.out with cond3 { + seq { + let4; + while le4.out with cond4 { + seq { + par { + upd4; + upd5; + } + let5; + let6; + upd6; + upd7; + } + } + upd8; + } + } + upd9; + } + } + let7; + while le5.out with cond5 { + seq { + let8; + while le6.out with cond6 { + seq { + upd10; + upd11; + upd12; + } + } + upd13; + } + } + } + } +} +component batch_flatten(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, x0_0_read_data: 32, x0_0_done: 1) -> (done: 1, data0_0_0_0_addr0: 1, data0_0_0_0_addr1: 1, data0_0_0_0_addr2: 5, data0_0_0_0_addr3: 5, data0_0_0_0_write_data: 32, data0_0_0_0_write_en: 1, data0_0_0_0_clk: 1, x0_0_addr0: 1, x0_0_addr1: 10, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1) { + cells { + add0 = prim std_add(10); + add1 = prim std_add(5); + add2 = prim std_add(5); + add3 = prim std_add(1); + add4 = prim std_add(1); + const0 = prim std_const(10, 0); + const1 = prim std_const(1, 0); + const10 = prim std_const(5, 1); + const11 = prim std_const(5, 1); + const12 = prim std_const(1, 1); + const13 = prim std_const(1, 1); + const2 = prim std_const(1, 0); + const3 = prim std_const(1, 0); + const4 = prim std_const(1, 0); + const5 = prim std_const(5, 0); + const6 = prim std_const(5, 27); + const7 = prim std_const(5, 0); + const8 = prim std_const(5, 27); + const9 = prim std_const(10, 1); + data_read0_0 = prim std_reg(32); + i0 = prim std_reg(1); + j0 = prim std_reg(1); + k0 = prim std_reg(5); + l0 = prim std_reg(5); + le0 = prim std_le(1); + le1 = prim std_le(1); + le2 = prim std_le(5); + le3 = prim std_le(5); + m_0 = prim std_reg(10); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const2.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const4.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = k0.out; + le2.right = const6.out; + } + group cond3<"static"=0> { + cond3[done] = 1'd1; + le3.left = l0.out; + le3.right = const8.out; + } + group let0<"static"=1> { + m_0.in = const0.out; + m_0.write_en = 1'd1; + let0[done] = m_0.done; + } + group let1<"static"=1> { + i0.in = const1.out; + i0.write_en = 1'd1; + let1[done] = i0.done; + } + group let2<"static"=1> { + j0.in = const3.out; + j0.write_en = 1'd1; + let2[done] = j0.done; + } + group let3<"static"=1> { + k0.in = const5.out; + k0.write_en = 1'd1; + let3[done] = k0.done; + } + group let4<"static"=1> { + l0.in = const7.out; + l0.write_en = 1'd1; + let4[done] = l0.done; + } + group upd0<"static"=1> { + data_read0_0.write_en = 1'd1; + data0_0_0_0_addr3 = l0.out; + data0_0_0_0_addr2 = k0.out; + data0_0_0_0_addr1 = j0.out; + data0_0_0_0_addr0 = i0.out; + data_read0_0.in = 1'd1 ? data0_0_0_0_read_data; + upd0[done] = data_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + x0_0_addr1 = m_0.out; + x0_0_addr0 = i0.out; + x0_0_write_en = 1'd1; + x0_0_write_data = 1'd1 ? data_read0_0.out; + upd1[done] = x0_0_done ? 1'd1; + } + group upd2<"static"=1> { + m_0.write_en = 1'd1; + add0.left = m_0.out; + add0.right = const9.out; + m_0.in = 1'd1 ? add0.out; + upd2[done] = m_0.done ? 1'd1; + } + group upd3<"static"=1> { + l0.write_en = 1'd1; + add1.left = l0.out; + add1.right = const10.out; + l0.in = 1'd1 ? add1.out; + upd3[done] = l0.done ? 1'd1; + } + group upd4<"static"=1> { + k0.write_en = 1'd1; + add2.left = k0.out; + add2.right = const11.out; + k0.in = 1'd1 ? add2.out; + upd4[done] = k0.done ? 1'd1; + } + group upd5<"static"=1> { + j0.write_en = 1'd1; + add3.left = j0.out; + add3.right = const12.out; + j0.in = 1'd1 ? add3.out; + upd5[done] = j0.done ? 1'd1; + } + group upd6<"static"=1> { + i0.write_en = 1'd1; + add4.left = i0.out; + add4.right = const13.out; + i0.in = 1'd1 ? add4.out; + upd6[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + let1; + while le0.out with cond0 { + seq { + let2; + while le1.out with cond1 { + seq { + let3; + while le2.out with cond2 { + seq { + let4; + while le3.out with cond3 { + seq { + upd0; + upd1; + upd2; + upd3; + } + } + upd4; + } + } + upd5; + } + } + upd6; + } + } + } + } +} + +component main () -> () { + cells { + x8 = prim std_mem_d2(32, 1, 10, 1, 4); + x7 = prim std_mem_d2(32, 1, 10, 1, 4); + fc3_bias = prim std_mem_d1(32, 10, 4); + bias_add2 = bias_add2; + x6 = prim std_mem_d2(32, 1, 64, 1, 7); + fc3_weight = prim std_mem_d2(32, 10, 64, 4, 7); + dense2 = dense2; + x5 = prim std_mem_d2(32, 1, 64, 1, 7); + relu1 = relu1; + x4 = prim std_mem_d2(32, 1, 64, 1, 7); + fc2_bias = prim std_mem_d1(32, 64, 7); + bias_add1 = bias_add1; + x3 = prim std_mem_d2(32, 1, 128, 1, 8); + fc2_weight = prim std_mem_d2(32, 64, 128, 7, 8); + dense1 = dense1; + x2 = prim std_mem_d2(32, 1, 128, 1, 8); + relu0 = relu; + x1 = prim std_mem_d2(32, 1, 128, 1, 8); + fc1_bias = prim std_mem_d1(32, 128, 8); + bias_add0 = bias_add; + x = prim std_mem_d2(32, 1, 784, 1, 10); + fc1_weight = prim std_mem_d2(32, 128, 784, 8, 10); + dense0 = dense; + data = prim std_mem_d4(32, 1, 1, 28, 28, 1, 1, 5, 5); + batch_flatten0 = batch_flatten; + } + wires { + group run_batch_flatten { + data.addr0 = batch_flatten0.data0_0_0_0_addr0; + batch_flatten0.data0_0_0_0_read_data = data.read_data; + data.addr1 = batch_flatten0.data0_0_0_0_addr1; + data.addr2 = batch_flatten0.data0_0_0_0_addr2; + x.addr0 = batch_flatten0.x0_0_addr0; + x.addr1 = batch_flatten0.x0_0_addr1; + x.write_data = batch_flatten0.x0_0_write_data; + x.write_en = batch_flatten0.x0_0_write_en; + batch_flatten0.x0_0_done = x.done; + batch_flatten0.go = 1'd1; + run_batch_flatten[done] = batch_flatten0.done ? 1'd1; + } + group run_dense { + x.addr0 = dense0.x0_0_addr0; + dense0.x0_0_read_data = x.read_data; + x.addr1 = dense0.x0_0_addr1; + fc1_weight.addr0 = dense0.fc1_weight0_0_addr0; + dense0.fc1_weight0_0_read_data = fc1_weight.read_data; + fc1_weight.addr1 = dense0.fc1_weight0_0_addr1; + x1.addr0 = dense0.x10_0_addr0; + x1.addr1 = dense0.x10_0_addr1; + x1.write_data = dense0.x10_0_write_data; + x1.write_en = dense0.x10_0_write_en; + dense0.x10_0_done = x1.done; + dense0.go = 1'd1; + run_dense[done] = dense0.done ? 1'd1; + } + group run_bias_add { + x1.addr0 = bias_add0.x10_0_addr0; + bias_add0.x10_0_read_data = x1.read_data; + x1.addr1 = bias_add0.x10_0_addr1; + fc1_bias.addr0 = bias_add0.fc1_bias0_addr0; + bias_add0.fc1_bias0_read_data = fc1_bias.read_data; + x2.addr0 = bias_add0.x20_0_addr0; + x2.addr1 = bias_add0.x20_0_addr1; + x2.write_data = bias_add0.x20_0_write_data; + x2.write_en = bias_add0.x20_0_write_en; + bias_add0.x20_0_done = x2.done; + bias_add0.go = 1'd1; + run_bias_add[done] = bias_add0.done ? 1'd1; + } + group run_relu { + x2.addr0 = relu0.x20_0_addr0; + relu0.x20_0_read_data = x2.read_data; + x2.addr1 = relu0.x20_0_addr1; + x3.addr0 = relu0.x30_0_addr0; + x3.addr1 = relu0.x30_0_addr1; + x3.write_data = relu0.x30_0_write_data; + x3.write_en = relu0.x30_0_write_en; + relu0.x30_0_done = x3.done; + relu0.go = 1'd1; + run_relu[done] = relu0.done ? 1'd1; + } + group run_dense1 { + x3.addr0 = dense1.x30_0_addr0; + dense1.x30_0_read_data = x3.read_data; + x3.addr1 = dense1.x30_0_addr1; + fc2_weight.addr0 = dense1.fc2_weight0_0_addr0; + dense1.fc2_weight0_0_read_data = fc2_weight.read_data; + fc2_weight.addr1 = dense1.fc2_weight0_0_addr1; + x4.addr0 = dense1.x40_0_addr0; + x4.addr1 = dense1.x40_0_addr1; + x4.write_data = dense1.x40_0_write_data; + x4.write_en = dense1.x40_0_write_en; + dense1.x40_0_done = x4.done; + dense1.go = 1'd1; + run_dense1[done] = dense1.done ? 1'd1; + } + group run_bias_add1 { + x4.addr0 = bias_add1.x40_0_addr0; + bias_add1.x40_0_read_data = x4.read_data; + x4.addr1 = bias_add1.x40_0_addr1; + fc2_bias.addr0 = bias_add1.fc2_bias0_addr0; + bias_add1.fc2_bias0_read_data = fc2_bias.read_data; + x5.addr0 = bias_add1.x50_0_addr0; + x5.addr1 = bias_add1.x50_0_addr1; + x5.write_data = bias_add1.x50_0_write_data; + x5.write_en = bias_add1.x50_0_write_en; + bias_add1.x50_0_done = x5.done; + bias_add1.go = 1'd1; + run_bias_add1[done] = bias_add1.done ? 1'd1; + } + group run_relu1 { + x5.addr0 = relu1.x50_0_addr0; + relu1.x50_0_read_data = x5.read_data; + x5.addr1 = relu1.x50_0_addr1; + x6.addr0 = relu1.x60_0_addr0; + x6.addr1 = relu1.x60_0_addr1; + x6.write_data = relu1.x60_0_write_data; + x6.write_en = relu1.x60_0_write_en; + relu1.x60_0_done = x6.done; + relu1.go = 1'd1; + run_relu1[done] = relu1.done ? 1'd1; + } + group run_dense2 { + x6.addr0 = dense2.x60_0_addr0; + dense2.x60_0_read_data = x6.read_data; + x6.addr1 = dense2.x60_0_addr1; + fc3_weight.addr0 = dense2.fc3_weight0_0_addr0; + dense2.fc3_weight0_0_read_data = fc3_weight.read_data; + fc3_weight.addr1 = dense2.fc3_weight0_0_addr1; + x7.addr0 = dense2.x70_0_addr0; + x7.addr1 = dense2.x70_0_addr1; + x7.write_data = dense2.x70_0_write_data; + x7.write_en = dense2.x70_0_write_en; + dense2.x70_0_done = x7.done; + dense2.go = 1'd1; + run_dense2[done] = dense2.done ? 1'd1; + } + group run_bias_add2 { + x7.addr0 = bias_add2.x70_0_addr0; + bias_add2.x70_0_read_data = x7.read_data; + x7.addr1 = bias_add2.x70_0_addr1; + fc3_bias.addr0 = bias_add2.fc3_bias0_addr0; + bias_add2.fc3_bias0_read_data = fc3_bias.read_data; + x8.addr0 = bias_add2.x80_0_addr0; + x8.addr1 = bias_add2.x80_0_addr1; + x8.write_data = bias_add2.x80_0_write_data; + x8.write_en = bias_add2.x80_0_write_en; + bias_add2.x80_0_done = x8.done; + bias_add2.go = 1'd1; + run_bias_add2[done] = bias_add2.done ? 1'd1; + } + } + control { + seq { + run_batch_flatten; + run_dense; + run_bias_add; + run_relu; + run_dense1; + run_bias_add1; + run_relu1; + run_dense2; + run_bias_add2; + } + } +} diff --git a/frontends/relay-futil/tests/mlp_net.relay b/frontends/relay-futil/tests/mlp_net.relay new file mode 100644 index 0000000000..ef53158f58 --- /dev/null +++ b/frontends/relay-futil/tests/mlp_net.relay @@ -0,0 +1,17 @@ +v0.0.4 +fn (%data: Tensor[(1, 1, 28, 28), int32], %fc1_weight: Tensor[(128, 784), int32], %fc1_bias: Tensor[(128), int32], + %fc2_weight: Tensor[(64, 128), int32], %fc2_bias: Tensor[(64), int32], %fc3_weight: Tensor[(10, 64), int32], + %fc3_bias: Tensor[(10), int32]) -> Tensor[(1, 10), int32] { + let %x: Tensor[(1, 784), int32] = nn.batch_flatten(%data) /* ty=Tensor[(1, 784), int32] */; + let %x1: Tensor[(1, 128), int32] = nn.dense(%x, %fc1_weight, units=128) /* ty=Tensor[(1, 128), int32] */; + let %x2: Tensor[(1, 128), int32] = nn.bias_add(%x1, %fc1_bias, axis=-1) /* ty=Tensor[(1, 128), int32] */; + let %x3: Tensor[(1, 128), int32] = nn.relu(%x2) /* ty=Tensor[(1, 128), int32] */; + let %x4: Tensor[(1, 64), int32] = nn.dense(%x3, %fc2_weight, units=64) /* ty=Tensor[(1, 64), int32] */; + let %x5: Tensor[(1, 64), int32] = nn.bias_add(%x4, %fc2_bias, axis=-1) /* ty=Tensor[(1, 64), int32] */; + let %x6: Tensor[(1, 64), int32] = nn.relu(%x5) /* ty=Tensor[(1, 64), int32] */; + let %x7: Tensor[(1, 10), int32] = nn.dense(%x6, %fc3_weight, units=10) /* ty=Tensor[(1, 10), int32] */; + let %x8: Tensor[(1, 10), int32] = nn.bias_add(%x7, %fc3_bias, axis=-1) /* ty=Tensor[(1, 10), int32] */; + %x8 + // let %x9: Tensor[(1, 10), int32] = nn.softmax(%x8) /* ty=Tensor[(1, 10), int32] */; + // %x9 +} From fe78aee732e236d1c408bd760c007f9a9c51dcb8 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 27 Nov 2020 08:07:29 -0500 Subject: [PATCH 52/75] Add mem_d4 to remove externals pass --- calyx/src/passes/remove_external_memories.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/calyx/src/passes/remove_external_memories.rs b/calyx/src/passes/remove_external_memories.rs index 381f878181..529312900b 100644 --- a/calyx/src/passes/remove_external_memories.rs +++ b/calyx/src/passes/remove_external_memories.rs @@ -14,9 +14,10 @@ impl Default for RemoveExternalMemories<'_> { ("std_mem_d1_ext", "std_mem_d1"), ("std_mem_d2_ext", "std_mem_d2"), ("std_mem_d3_ext", "std_mem_d3"), + ("std_mem_d4_ext", "std_mem_d4") ] - .into_iter() - .collect(); + .into_iter() + .collect(); Self { changeable } } } From e9b3db37c7104f17f114b7243fba1bae8047e6f9 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 27 Nov 2020 08:14:34 -0500 Subject: [PATCH 53/75] Add , --- calyx/src/passes/remove_external_memories.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/calyx/src/passes/remove_external_memories.rs b/calyx/src/passes/remove_external_memories.rs index 529312900b..4c2abdd842 100644 --- a/calyx/src/passes/remove_external_memories.rs +++ b/calyx/src/passes/remove_external_memories.rs @@ -14,10 +14,10 @@ impl Default for RemoveExternalMemories<'_> { ("std_mem_d1_ext", "std_mem_d1"), ("std_mem_d2_ext", "std_mem_d2"), ("std_mem_d3_ext", "std_mem_d3"), - ("std_mem_d4_ext", "std_mem_d4") + ("std_mem_d4_ext", "std_mem_d4"), ] - .into_iter() - .collect(); + .into_iter() + .collect(); Self { changeable } } } From 2ebe1eccb53e43c3be536f915d17b2b68f65d5f8 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 27 Nov 2020 08:45:19 -0500 Subject: [PATCH 54/75] Use op instead of + --- frontends/relay-futil/dahlia_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 084524618b..4839a452ec 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -65,7 +65,7 @@ def broadcast(declaration): for (i = 0...64) { for (j = 0..16) { for (k = 0..32) { - result[i][j][k] := op1[i][0][k] + op2[j][0]; + result[i][j][k] := op1[i][0][k] op op2[j][0]; ... """ op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive From 62a20de4a617aa17b72c854c3918f884ed43f83a Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 27 Nov 2020 13:34:18 -0500 Subject: [PATCH 55/75] Add fixed_p_std_gt. --- frontends/relay-futil/dahlia_functions.py | 6 +-- frontends/relay-futil/tests/relu.expect | 57 +++++++++++------------ frontends/relay-futil/tests/relu.relay | 4 +- primitives/std.lib | 18 ++++++- 4 files changed, 49 insertions(+), 36 deletions(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 4839a452ec..1eb29a9b53 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -171,10 +171,10 @@ def relu(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.relu""" data, res = declaration.inputs[0].primitive, declaration.output.primitive bitwidth, num_dimensions = data.data[0], data.type - assert res.data_type == 'ubit', f'{res.data_type} is not currently supported for ReLU.' declarations = pp_dahlia_memory_declarations([data, res]) - let_zero = f'let zero: {data.data_type}<{bitwidth}> = 0;' + zero = '0.0' if data.data_type == 'ufix' else '0' + let_zero = f'let zero: {data.data_type}<{bitwidth}> = {zero};' indices = "" variable_name = CHARACTER_I @@ -184,7 +184,7 @@ def relu(declaration): variable_name = next_character(variable_name) body = f"""if ({data.name}{indices} > zero) {{ {res.name}{indices} := {data.name}{indices}; }} - else {{ {res.name}{indices} := 0; }}""" + else {{ {res.name}{indices} := zero; }}""" program_body = pp_dahlia_loop(data, body) return lower_dahlia_program(f"""{declarations}{NEWL}{let_zero}{NEWL}{program_body}""", declaration.component_name) diff --git a/frontends/relay-futil/tests/relu.expect b/frontends/relay-futil/tests/relu.expect index 7c2ac6e96a..74b5646d9b 100644 --- a/frontends/relay-futil/tests/relu.expect +++ b/frontends/relay-futil/tests/relu.expect @@ -6,21 +6,20 @@ component relu(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_ add1 = prim std_add(4); add2 = prim std_add(3); add3 = prim std_add(2); - const0 = prim std_const(32, 0); - const1 = prim std_const(2, 0); - const10 = prim std_const(6, 1); - const11 = prim std_const(4, 1); - const12 = prim std_const(3, 1); - const13 = prim std_const(2, 1); - const2 = prim std_const(2, 1); - const3 = prim std_const(3, 0); - const4 = prim std_const(3, 3); - const5 = prim std_const(4, 0); - const6 = prim std_const(4, 7); - const7 = prim std_const(6, 0); - const8 = prim std_const(6, 31); - const9 = prim std_const(32, 0); - gt0 = prim std_gt(32); + const0 = prim std_const(2, 0); + const1 = prim std_const(2, 1); + const10 = prim std_const(3, 1); + const11 = prim std_const(2, 1); + const2 = prim std_const(3, 0); + const3 = prim std_const(3, 3); + const4 = prim std_const(4, 0); + const5 = prim std_const(4, 7); + const6 = prim std_const(6, 0); + const7 = prim std_const(6, 31); + const8 = prim std_const(6, 1); + const9 = prim std_const(4, 1); + fpconst0 = prim fixed_p_std_const(32, 16, 16, 0, 0); + gt0 = prim fixed_p_std_gt(32, 16, 16); i0 = prim std_reg(2); j0 = prim std_reg(3); k0 = prim std_reg(4); @@ -37,22 +36,22 @@ component relu(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_ group cond0<"static"=0> { cond0[done] = 1'd1; le0.left = i0.out; - le0.right = const2.out; + le0.right = const1.out; } group cond1<"static"=0> { cond1[done] = 1'd1; le1.left = j0.out; - le1.right = const4.out; + le1.right = const3.out; } group cond2<"static"=0> { cond2[done] = 1'd1; le2.left = k0.out; - le2.right = const6.out; + le2.right = const5.out; } group cond3<"static"=0> { cond3[done] = 1'd1; le3.left = l0.out; - le3.right = const8.out; + le3.right = const7.out; } group cond4<"static"=0> { cond4[done] = 1'd1; @@ -60,27 +59,27 @@ component relu(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_ gt0.right = zero_0.out; } group let0<"static"=1> { - zero_0.in = const0.out; + zero_0.in = fpconst0.out; zero_0.write_en = 1'd1; let0[done] = zero_0.done; } group let1<"static"=1> { - i0.in = const1.out; + i0.in = const0.out; i0.write_en = 1'd1; let1[done] = i0.done; } group let2<"static"=1> { - j0.in = const3.out; + j0.in = const2.out; j0.write_en = 1'd1; let2[done] = j0.done; } group let3<"static"=1> { - k0.in = const5.out; + k0.in = const4.out; k0.write_en = 1'd1; let3[done] = k0.done; } group let4<"static"=1> { - l0.in = const7.out; + l0.in = const6.out; l0.write_en = 1'd1; let4[done] = l0.done; } @@ -117,34 +116,34 @@ component relu(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_ x10_0_0_0_addr1 = j0.out; x10_0_0_0_addr0 = i0.out; x10_0_0_0_write_en = 1'd1; - x10_0_0_0_write_data = 1'd1 ? const9.out; + x10_0_0_0_write_data = 1'd1 ? zero_0.out; upd3[done] = x10_0_0_0_done ? 1'd1; } group upd4<"static"=1> { l0.write_en = 1'd1; add0.left = l0.out; - add0.right = const10.out; + add0.right = const8.out; l0.in = 1'd1 ? add0.out; upd4[done] = l0.done ? 1'd1; } group upd5<"static"=1> { k0.write_en = 1'd1; add1.left = k0.out; - add1.right = const11.out; + add1.right = const9.out; k0.in = 1'd1 ? add1.out; upd5[done] = k0.done ? 1'd1; } group upd6<"static"=1> { j0.write_en = 1'd1; add2.left = j0.out; - add2.right = const12.out; + add2.right = const10.out; j0.in = 1'd1 ? add2.out; upd6[done] = j0.done ? 1'd1; } group upd7<"static"=1> { i0.write_en = 1'd1; add3.left = i0.out; - add3.right = const13.out; + add3.right = const11.out; i0.in = 1'd1 ? add3.out; upd7[done] = i0.done ? 1'd1; } diff --git a/frontends/relay-futil/tests/relu.relay b/frontends/relay-futil/tests/relu.relay index fd5278c4a8..98c9dc8df1 100644 --- a/frontends/relay-futil/tests/relu.relay +++ b/frontends/relay-futil/tests/relu.relay @@ -1,6 +1,6 @@ v0.0.4 -fn (%x: Tensor[(2, 4, 8, 32), int32]) { - let %x1: Tensor[(2, 4, 8, 32), int32] = nn.relu(%x); +fn (%x: Tensor[(2, 4, 8, 32), float32]) { + let %x1: Tensor[(2, 4, 8, 32), float32] = nn.relu(%x); %x1 } diff --git a/primitives/std.lib b/primitives/std.lib index ecd352640a..ad63d71ff8 100644 --- a/primitives/std.lib +++ b/primitives/std.lib @@ -891,14 +891,28 @@ primitive fixed_p_std_div[width, int_width, fract_width](left: width, right: wid logic [2*width-2:0] result; assign result = left / right; - // result bit= 2*width, 1 is placed at fract_width, - //the valid bit would be width amount of bits starting at fract_width + // result bit = 2 * width, 1 is placed at fract_width, + // the valid bit would be width amount of bits starting at fract_width assign out = result[width+fract_width-1:fract_width]; endmodule } } +primitive fixed_p_std_gt<"share"=1>[width, int_width, fract_width](left: width, right: width) -> (out: 1) { + verilog { + module fixed_p_std_gt + #(parameter width = 32, + parameter int_width = 8, + parameter fract_width = 24) + (input logic [width-1:0] left, + input logic [width-1:0] right, + output logic out); + assign out = left > right; + endmodule + } +} + // the bigger integer bit always comes left, one with bigger fractional bit comes right primitive fixed_p_std_add_dbit[width, int_width1, fract_width1, int_width2, fract_width2, out_width] (left: width, right: width) ->(out: out_width){ From 4482dd59ca4c77052908aa53118187408985341e Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 27 Nov 2020 13:50:33 -0500 Subject: [PATCH 56/75] Float mlp_net --- frontends/relay-futil/tests/mlp_net.expect | 110 +++++++++++---------- frontends/relay-futil/tests/mlp_net.relay | 26 ++--- 2 files changed, 73 insertions(+), 63 deletions(-) diff --git a/frontends/relay-futil/tests/mlp_net.expect b/frontends/relay-futil/tests/mlp_net.expect index ea30df19af..812a0381fd 100644 --- a/frontends/relay-futil/tests/mlp_net.expect +++ b/frontends/relay-futil/tests/mlp_net.expect @@ -2,7 +2,7 @@ import "primitives/std.lib"; component bias_add2(go: 1, clk: 1, fc3_bias0_read_data: 32, fc3_bias0_done: 1, x70_0_read_data: 32, x70_0_done: 1, x80_0_read_data: 32, x80_0_done: 1) -> (done: 1, fc3_bias0_addr0: 4, fc3_bias0_write_data: 32, fc3_bias0_write_en: 1, fc3_bias0_clk: 1, x70_0_addr0: 1, x70_0_addr1: 4, x70_0_write_data: 32, x70_0_write_en: 1, x70_0_clk: 1, x80_0_addr0: 1, x80_0_addr1: 4, x80_0_write_data: 32, x80_0_write_en: 1, x80_0_clk: 1) { cells { - add0 = prim std_add(32); + add0 = prim fixed_p_std_add(32, 16, 16); add1 = prim std_add(4); add2 = prim std_add(1); const0 = prim std_const(1, 0); @@ -103,7 +103,7 @@ component dense2(go: 1, clk: 1, fc3_weight0_0_read_data: 32, fc3_weight0_0_done: cells { add0 = prim std_add(7); add1 = prim std_add(4); - add2 = prim std_add(32); + add2 = prim fixed_p_std_add(32, 16, 16); add3 = prim std_add(7); add4 = prim std_add(4); add5 = prim std_add(1); @@ -148,6 +148,8 @@ component dense2(go: 1, clk: 1, fc3_weight0_0_read_data: 32, fc3_weight0_0_done: le6 = prim std_le(4); mult_pipe0 = prim std_mult_pipe(32); product_0 = prim std_reg(32); + slice0 = prim std_slice(32, 32); + slice1 = prim std_slice(32, 32); temporary_x70_0 = prim std_mem_d2(32, 1, 10, 1, 4); temporary_x7_read0_0 = prim std_reg(32); transpose_fc3_weight0_0 = prim std_mem_d2(32, 64, 10, 7, 4); @@ -215,18 +217,20 @@ component dense2(go: 1, clk: 1, fc3_weight0_0_read_data: 32, fc3_weight0_0_done: k0.write_en = 1'd1; let4[done] = k0.done; } - group let5<"static"=4> { - bin_read0_0.in = mult_pipe0.out; - bin_read0_0.write_en = mult_pipe0.done; + group let5<"static"=1> { + bin_read0_0.in = slice0.out; + bin_read0_0.write_en = 1'd1; let5[done] = bin_read0_0.done; + slice0.in = mult_pipe0.out; mult_pipe0.left = x6_read0_0.out; mult_pipe0.right = transpose_fc3_weight_read0_0.out; mult_pipe0.go = !mult_pipe0.done ? 1'd1; } group let6<"static"=1> { - product_0.in = bin_read0_0.out; + product_0.in = slice1.out; product_0.write_en = 1'd1; let6[done] = product_0.done; + slice1.in = bin_read0_0.out; } group let7<"static"=1> { i2.in = const15.out; @@ -404,15 +408,14 @@ component relu1(go: 1, clk: 1, x50_0_read_data: 32, x50_0_done: 1, x60_0_read_da cells { add0 = prim std_add(7); add1 = prim std_add(1); - const0 = prim std_const(32, 0); + const0 = prim std_const(1, 0); const1 = prim std_const(1, 0); - const2 = prim std_const(1, 0); - const3 = prim std_const(7, 0); - const4 = prim std_const(7, 63); - const5 = prim std_const(32, 0); - const6 = prim std_const(7, 1); - const7 = prim std_const(1, 1); - gt0 = prim std_gt(32); + const2 = prim std_const(7, 0); + const3 = prim std_const(7, 63); + const4 = prim std_const(7, 1); + const5 = prim std_const(1, 1); + fpconst0 = prim fixed_p_std_const(32, 16, 16, 0, 0); + gt0 = prim fixed_p_std_gt(32, 16, 16); i0 = prim std_reg(1); j0 = prim std_reg(7); le0 = prim std_le(1); @@ -425,12 +428,12 @@ component relu1(go: 1, clk: 1, x50_0_read_data: 32, x50_0_done: 1, x60_0_read_da group cond0<"static"=0> { cond0[done] = 1'd1; le0.left = i0.out; - le0.right = const2.out; + le0.right = const1.out; } group cond1<"static"=0> { cond1[done] = 1'd1; le1.left = j0.out; - le1.right = const4.out; + le1.right = const3.out; } group cond2<"static"=0> { cond2[done] = 1'd1; @@ -438,17 +441,17 @@ component relu1(go: 1, clk: 1, x50_0_read_data: 32, x50_0_done: 1, x60_0_read_da gt0.right = zero_0.out; } group let0<"static"=1> { - zero_0.in = const0.out; + zero_0.in = fpconst0.out; zero_0.write_en = 1'd1; let0[done] = zero_0.done; } group let1<"static"=1> { - i0.in = const1.out; + i0.in = const0.out; i0.write_en = 1'd1; let1[done] = i0.done; } group let2<"static"=1> { - j0.in = const3.out; + j0.in = const2.out; j0.write_en = 1'd1; let2[done] = j0.done; } @@ -477,20 +480,20 @@ component relu1(go: 1, clk: 1, x50_0_read_data: 32, x50_0_done: 1, x60_0_read_da x60_0_addr1 = j0.out; x60_0_addr0 = i0.out; x60_0_write_en = 1'd1; - x60_0_write_data = 1'd1 ? const5.out; + x60_0_write_data = 1'd1 ? zero_0.out; upd3[done] = x60_0_done ? 1'd1; } group upd4<"static"=1> { j0.write_en = 1'd1; add0.left = j0.out; - add0.right = const6.out; + add0.right = const4.out; j0.in = 1'd1 ? add0.out; upd4[done] = j0.done ? 1'd1; } group upd5<"static"=1> { i0.write_en = 1'd1; add1.left = i0.out; - add1.right = const7.out; + add1.right = const5.out; i0.in = 1'd1 ? add1.out; upd5[done] = i0.done ? 1'd1; } @@ -525,7 +528,7 @@ component relu1(go: 1, clk: 1, x50_0_read_data: 32, x50_0_done: 1, x60_0_read_da } component bias_add1(go: 1, clk: 1, fc2_bias0_read_data: 32, fc2_bias0_done: 1, x40_0_read_data: 32, x40_0_done: 1, x50_0_read_data: 32, x50_0_done: 1) -> (done: 1, fc2_bias0_addr0: 7, fc2_bias0_write_data: 32, fc2_bias0_write_en: 1, fc2_bias0_clk: 1, x40_0_addr0: 1, x40_0_addr1: 7, x40_0_write_data: 32, x40_0_write_en: 1, x40_0_clk: 1, x50_0_addr0: 1, x50_0_addr1: 7, x50_0_write_data: 32, x50_0_write_en: 1, x50_0_clk: 1) { cells { - add0 = prim std_add(32); + add0 = prim fixed_p_std_add(32, 16, 16); add1 = prim std_add(7); add2 = prim std_add(1); const0 = prim std_const(1, 0); @@ -626,7 +629,7 @@ component dense1(go: 1, clk: 1, fc2_weight0_0_read_data: 32, fc2_weight0_0_done: cells { add0 = prim std_add(8); add1 = prim std_add(7); - add2 = prim std_add(32); + add2 = prim fixed_p_std_add(32, 16, 16); add3 = prim std_add(8); add4 = prim std_add(7); add5 = prim std_add(1); @@ -671,6 +674,8 @@ component dense1(go: 1, clk: 1, fc2_weight0_0_read_data: 32, fc2_weight0_0_done: le6 = prim std_le(7); mult_pipe0 = prim std_mult_pipe(32); product_0 = prim std_reg(32); + slice0 = prim std_slice(32, 32); + slice1 = prim std_slice(32, 32); temporary_x40_0 = prim std_mem_d2(32, 1, 64, 1, 7); temporary_x4_read0_0 = prim std_reg(32); transpose_fc2_weight0_0 = prim std_mem_d2(32, 128, 64, 8, 7); @@ -738,18 +743,20 @@ component dense1(go: 1, clk: 1, fc2_weight0_0_read_data: 32, fc2_weight0_0_done: k0.write_en = 1'd1; let4[done] = k0.done; } - group let5<"static"=4> { - bin_read0_0.in = mult_pipe0.out; - bin_read0_0.write_en = mult_pipe0.done; + group let5<"static"=1> { + bin_read0_0.in = slice0.out; + bin_read0_0.write_en = 1'd1; let5[done] = bin_read0_0.done; + slice0.in = mult_pipe0.out; mult_pipe0.left = x3_read0_0.out; mult_pipe0.right = transpose_fc2_weight_read0_0.out; mult_pipe0.go = !mult_pipe0.done ? 1'd1; } group let6<"static"=1> { - product_0.in = bin_read0_0.out; + product_0.in = slice1.out; product_0.write_en = 1'd1; let6[done] = product_0.done; + slice1.in = bin_read0_0.out; } group let7<"static"=1> { i2.in = const15.out; @@ -927,15 +934,14 @@ component relu(go: 1, clk: 1, x20_0_read_data: 32, x20_0_done: 1, x30_0_read_dat cells { add0 = prim std_add(8); add1 = prim std_add(1); - const0 = prim std_const(32, 0); + const0 = prim std_const(1, 0); const1 = prim std_const(1, 0); - const2 = prim std_const(1, 0); - const3 = prim std_const(8, 0); - const4 = prim std_const(8, 127); - const5 = prim std_const(32, 0); - const6 = prim std_const(8, 1); - const7 = prim std_const(1, 1); - gt0 = prim std_gt(32); + const2 = prim std_const(8, 0); + const3 = prim std_const(8, 127); + const4 = prim std_const(8, 1); + const5 = prim std_const(1, 1); + fpconst0 = prim fixed_p_std_const(32, 16, 16, 0, 0); + gt0 = prim fixed_p_std_gt(32, 16, 16); i0 = prim std_reg(1); j0 = prim std_reg(8); le0 = prim std_le(1); @@ -948,12 +954,12 @@ component relu(go: 1, clk: 1, x20_0_read_data: 32, x20_0_done: 1, x30_0_read_dat group cond0<"static"=0> { cond0[done] = 1'd1; le0.left = i0.out; - le0.right = const2.out; + le0.right = const1.out; } group cond1<"static"=0> { cond1[done] = 1'd1; le1.left = j0.out; - le1.right = const4.out; + le1.right = const3.out; } group cond2<"static"=0> { cond2[done] = 1'd1; @@ -961,17 +967,17 @@ component relu(go: 1, clk: 1, x20_0_read_data: 32, x20_0_done: 1, x30_0_read_dat gt0.right = zero_0.out; } group let0<"static"=1> { - zero_0.in = const0.out; + zero_0.in = fpconst0.out; zero_0.write_en = 1'd1; let0[done] = zero_0.done; } group let1<"static"=1> { - i0.in = const1.out; + i0.in = const0.out; i0.write_en = 1'd1; let1[done] = i0.done; } group let2<"static"=1> { - j0.in = const3.out; + j0.in = const2.out; j0.write_en = 1'd1; let2[done] = j0.done; } @@ -1000,20 +1006,20 @@ component relu(go: 1, clk: 1, x20_0_read_data: 32, x20_0_done: 1, x30_0_read_dat x30_0_addr1 = j0.out; x30_0_addr0 = i0.out; x30_0_write_en = 1'd1; - x30_0_write_data = 1'd1 ? const5.out; + x30_0_write_data = 1'd1 ? zero_0.out; upd3[done] = x30_0_done ? 1'd1; } group upd4<"static"=1> { j0.write_en = 1'd1; add0.left = j0.out; - add0.right = const6.out; + add0.right = const4.out; j0.in = 1'd1 ? add0.out; upd4[done] = j0.done ? 1'd1; } group upd5<"static"=1> { i0.write_en = 1'd1; add1.left = i0.out; - add1.right = const7.out; + add1.right = const5.out; i0.in = 1'd1 ? add1.out; upd5[done] = i0.done ? 1'd1; } @@ -1048,7 +1054,7 @@ component relu(go: 1, clk: 1, x20_0_read_data: 32, x20_0_done: 1, x30_0_read_dat } component bias_add(go: 1, clk: 1, fc1_bias0_read_data: 32, fc1_bias0_done: 1, x10_0_read_data: 32, x10_0_done: 1, x20_0_read_data: 32, x20_0_done: 1) -> (done: 1, fc1_bias0_addr0: 8, fc1_bias0_write_data: 32, fc1_bias0_write_en: 1, fc1_bias0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 8, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, x20_0_addr0: 1, x20_0_addr1: 8, x20_0_write_data: 32, x20_0_write_en: 1, x20_0_clk: 1) { cells { - add0 = prim std_add(32); + add0 = prim fixed_p_std_add(32, 16, 16); add1 = prim std_add(8); add2 = prim std_add(1); const0 = prim std_const(1, 0); @@ -1149,7 +1155,7 @@ component dense(go: 1, clk: 1, fc1_weight0_0_read_data: 32, fc1_weight0_0_done: cells { add0 = prim std_add(10); add1 = prim std_add(8); - add2 = prim std_add(32); + add2 = prim fixed_p_std_add(32, 16, 16); add3 = prim std_add(10); add4 = prim std_add(8); add5 = prim std_add(1); @@ -1194,6 +1200,8 @@ component dense(go: 1, clk: 1, fc1_weight0_0_read_data: 32, fc1_weight0_0_done: le6 = prim std_le(8); mult_pipe0 = prim std_mult_pipe(32); product_0 = prim std_reg(32); + slice0 = prim std_slice(32, 32); + slice1 = prim std_slice(32, 32); temporary_x10_0 = prim std_mem_d2(32, 1, 128, 1, 8); temporary_x1_read0_0 = prim std_reg(32); transpose_fc1_weight0_0 = prim std_mem_d2(32, 784, 128, 10, 8); @@ -1261,18 +1269,20 @@ component dense(go: 1, clk: 1, fc1_weight0_0_read_data: 32, fc1_weight0_0_done: k0.write_en = 1'd1; let4[done] = k0.done; } - group let5<"static"=4> { - bin_read0_0.in = mult_pipe0.out; - bin_read0_0.write_en = mult_pipe0.done; + group let5<"static"=1> { + bin_read0_0.in = slice0.out; + bin_read0_0.write_en = 1'd1; let5[done] = bin_read0_0.done; + slice0.in = mult_pipe0.out; mult_pipe0.left = x_read0_0.out; mult_pipe0.right = transpose_fc1_weight_read0_0.out; mult_pipe0.go = !mult_pipe0.done ? 1'd1; } group let6<"static"=1> { - product_0.in = bin_read0_0.out; + product_0.in = slice1.out; product_0.write_en = 1'd1; let6[done] = product_0.done; + slice1.in = bin_read0_0.out; } group let7<"static"=1> { i2.in = const15.out; diff --git a/frontends/relay-futil/tests/mlp_net.relay b/frontends/relay-futil/tests/mlp_net.relay index ef53158f58..8943360100 100644 --- a/frontends/relay-futil/tests/mlp_net.relay +++ b/frontends/relay-futil/tests/mlp_net.relay @@ -1,17 +1,17 @@ v0.0.4 -fn (%data: Tensor[(1, 1, 28, 28), int32], %fc1_weight: Tensor[(128, 784), int32], %fc1_bias: Tensor[(128), int32], - %fc2_weight: Tensor[(64, 128), int32], %fc2_bias: Tensor[(64), int32], %fc3_weight: Tensor[(10, 64), int32], - %fc3_bias: Tensor[(10), int32]) -> Tensor[(1, 10), int32] { - let %x: Tensor[(1, 784), int32] = nn.batch_flatten(%data) /* ty=Tensor[(1, 784), int32] */; - let %x1: Tensor[(1, 128), int32] = nn.dense(%x, %fc1_weight, units=128) /* ty=Tensor[(1, 128), int32] */; - let %x2: Tensor[(1, 128), int32] = nn.bias_add(%x1, %fc1_bias, axis=-1) /* ty=Tensor[(1, 128), int32] */; - let %x3: Tensor[(1, 128), int32] = nn.relu(%x2) /* ty=Tensor[(1, 128), int32] */; - let %x4: Tensor[(1, 64), int32] = nn.dense(%x3, %fc2_weight, units=64) /* ty=Tensor[(1, 64), int32] */; - let %x5: Tensor[(1, 64), int32] = nn.bias_add(%x4, %fc2_bias, axis=-1) /* ty=Tensor[(1, 64), int32] */; - let %x6: Tensor[(1, 64), int32] = nn.relu(%x5) /* ty=Tensor[(1, 64), int32] */; - let %x7: Tensor[(1, 10), int32] = nn.dense(%x6, %fc3_weight, units=10) /* ty=Tensor[(1, 10), int32] */; - let %x8: Tensor[(1, 10), int32] = nn.bias_add(%x7, %fc3_bias, axis=-1) /* ty=Tensor[(1, 10), int32] */; +fn (%data: Tensor[(1, 1, 28, 28), float32], %fc1_weight: Tensor[(128, 784), float32], %fc1_bias: Tensor[(128), float32], + %fc2_weight: Tensor[(64, 128), float32], %fc2_bias: Tensor[(64), float32], %fc3_weight: Tensor[(10, 64), float32], + %fc3_bias: Tensor[(10), float32]) -> Tensor[(1, 10), float32] { + let %x: Tensor[(1, 784), float32] = nn.batch_flatten(%data) /* ty=Tensor[(1, 784), float32] */; + let %x1: Tensor[(1, 128), float32] = nn.dense(%x, %fc1_weight, units=128) /* ty=Tensor[(1, 128), float32] */; + let %x2: Tensor[(1, 128), float32] = nn.bias_add(%x1, %fc1_bias, axis=-1) /* ty=Tensor[(1, 128), float32] */; + let %x3: Tensor[(1, 128), float32] = nn.relu(%x2) /* ty=Tensor[(1, 128), float32] */; + let %x4: Tensor[(1, 64), float32] = nn.dense(%x3, %fc2_weight, units=64) /* ty=Tensor[(1, 64), float32] */; + let %x5: Tensor[(1, 64), float32] = nn.bias_add(%x4, %fc2_bias, axis=-1) /* ty=Tensor[(1, 64), float32] */; + let %x6: Tensor[(1, 64), float32] = nn.relu(%x5) /* ty=Tensor[(1, 64), float32] */; + let %x7: Tensor[(1, 10), float32] = nn.dense(%x6, %fc3_weight, units=10) /* ty=Tensor[(1, 10), float32] */; + let %x8: Tensor[(1, 10), float32] = nn.bias_add(%x7, %fc3_bias, axis=-1) /* ty=Tensor[(1, 10), float32] */; %x8 - // let %x9: Tensor[(1, 10), int32] = nn.softmax(%x8) /* ty=Tensor[(1, 10), int32] */; + // let %x9: Tensor[(1, 10), float32] = nn.softmax(%x8) /* ty=Tensor[(1, 10), float32] */; // %x9 } From 822f1f32beb1c571c3dc60cbb6e5c7ccbcababb9 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 27 Nov 2020 14:13:57 -0500 Subject: [PATCH 57/75] Add element-wise sqrt. --- frontends/relay-futil/compiler.py | 3 +- frontends/relay-futil/dahlia_functions.py | 20 +++ frontends/relay-futil/tests/sqrt.expect | 180 ++++++++++++++++++++++ frontends/relay-futil/tests/sqrt.relay | 6 + 4 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 frontends/relay-futil/tests/sqrt.expect create mode 100644 frontends/relay-futil/tests/sqrt.relay diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 75dc565df9..86fd5bd3e3 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -14,7 +14,8 @@ # Mapping from Relay function names to their respective Dahlia lowering. RelayFunctionCalls = {'nn.dense': dense, 'nn.batch_flatten': batch_flatten, 'nn.batch_matmul': batch_matmul, - 'nn.bias_add': bias_add, 'nn.relu': relu, 'negative': negative, 'expand_dims': expand_dims} + 'nn.bias_add': bias_add, 'nn.relu': relu, 'negative': negative, 'expand_dims': expand_dims, + 'sqrt': sqrt} # Mapping between primitive type and associated Dahlia name extension. # E.g. A 2D memory primitive named `A` will be lowered to `A0_0`. diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 1eb29a9b53..106c000205 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -207,6 +207,26 @@ def negative(declaration): return lower_dahlia_program(f"""{declarations}{NEWL}{program_body}""", declaration.component_name) +# TODO(cgyurgyik): Similar to ReLU, this requires signed operands. +def sqrt(declaration): + """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.negative""" + op, res = declaration.inputs[0].primitive, declaration.output.primitive + bitwidth, num_dimensions, data_type = op.data[0], op.type, op.data_type + include_sqrt = f"""import "fxp_sqrt.h" {{ def sqrt(value: {data_type}<{bitwidth}>): {data_type}<{bitwidth}>; }}""" + + indices = "" + variable_name = CHARACTER_I + for i in range(0, num_dimensions): + # Determine loop body indices. + indices += f'[{variable_name}]' + variable_name = next_character(variable_name) + + declarations = pp_dahlia_memory_declarations([op, res]) + program_body = pp_dahlia_loop(op, f"""{res.name}{indices} := sqrt({op.name}{indices});""") + return lower_dahlia_program(f"""{include_sqrt}{NEWL}{declarations}{NEWL}{program_body}""", + declaration.component_name) + + def expand_dims(declaration): """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.expand_dims""" axis, num_newaxis = declaration.attributes.get_int("axis"), declaration.attributes.get_int("num_newaxis") diff --git a/frontends/relay-futil/tests/sqrt.expect b/frontends/relay-futil/tests/sqrt.expect new file mode 100644 index 0000000000..edb40c6259 --- /dev/null +++ b/frontends/relay-futil/tests/sqrt.expect @@ -0,0 +1,180 @@ +import "primitives/std.lib"; + +component sqrt(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, x0_0_0_0_addr0: 4, x0_0_0_0_addr1: 4, x0_0_0_0_addr2: 5, x0_0_0_0_addr3: 7, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 4, x10_0_0_0_addr1: 4, x10_0_0_0_addr2: 5, x10_0_0_0_addr3: 7, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { + cells { + add0 = prim std_add(7); + add1 = prim std_add(5); + add2 = prim std_add(4); + add3 = prim std_add(4); + const0 = prim std_const(4, 0); + const1 = prim std_const(4, 7); + const10 = prim std_const(4, 1); + const11 = prim std_const(4, 1); + const2 = prim std_const(4, 0); + const3 = prim std_const(4, 7); + const4 = prim std_const(5, 0); + const5 = prim std_const(5, 15); + const6 = prim std_const(7, 0); + const7 = prim std_const(7, 63); + const8 = prim std_const(7, 1); + const9 = prim std_const(5, 1); + i0 = prim std_reg(4); + j0 = prim std_reg(4); + k0 = prim std_reg(5); + l0 = prim std_reg(7); + le0 = prim std_le(4); + le1 = prim std_le(4); + le2 = prim std_le(5); + le3 = prim std_le(7); + sqrt0 = prim std_sqrt(); + x_read0_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = k0.out; + le2.right = const5.out; + } + group cond3<"static"=0> { + cond3[done] = 1'd1; + le3.left = l0.out; + le3.right = const7.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let1[done] = j0.done; + } + group let2<"static"=1> { + k0.in = const4.out; + k0.write_en = 1'd1; + let2[done] = k0.done; + } + group let3<"static"=1> { + l0.in = const6.out; + l0.write_en = 1'd1; + let3[done] = l0.done; + } + group upd0<"static"=1> { + x_read0_0.write_en = 1'd1; + x0_0_0_0_addr3 = l0.out; + x0_0_0_0_addr2 = k0.out; + x0_0_0_0_addr1 = j0.out; + x0_0_0_0_addr0 = i0.out; + x_read0_0.in = 1'd1 ? x0_0_0_0_read_data; + upd0[done] = x_read0_0.done ? 1'd1; + } + group upd1<"static"=18> { + x10_0_0_0_addr3 = l0.out; + x10_0_0_0_addr2 = k0.out; + x10_0_0_0_addr1 = j0.out; + x10_0_0_0_addr0 = i0.out; + x10_0_0_0_write_en = sqrt0.done; + sqrt0.in = x_read0_0.out; + sqrt0.go = !sqrt0.done ? 1'd1; + x10_0_0_0_write_data = sqrt0.done ? sqrt0.out; + upd1[done] = x10_0_0_0_done ? 1'd1; + } + group upd2<"static"=1> { + l0.write_en = 1'd1; + add0.left = l0.out; + add0.right = const8.out; + l0.in = 1'd1 ? add0.out; + upd2[done] = l0.done ? 1'd1; + } + group upd3<"static"=1> { + k0.write_en = 1'd1; + add1.left = k0.out; + add1.right = const9.out; + k0.in = 1'd1 ? add1.out; + upd3[done] = k0.done ? 1'd1; + } + group upd4<"static"=1> { + j0.write_en = 1'd1; + add2.left = j0.out; + add2.right = const10.out; + j0.in = 1'd1 ? add2.out; + upd4[done] = j0.done ? 1'd1; + } + group upd5<"static"=1> { + i0.write_en = 1'd1; + add3.left = i0.out; + add3.right = const11.out; + i0.in = 1'd1 ? add3.out; + upd5[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + let2; + while le2.out with cond2 { + seq { + let3; + while le3.out with cond3 { + seq { + upd0; + upd1; + upd2; + } + } + upd3; + } + } + upd4; + } + } + upd5; + } + } + } + } +} + +component main () -> () { + cells { + x1 = prim std_mem_d4(32, 8, 8, 16, 64, 4, 4, 5, 7); + x = prim std_mem_d4(32, 8, 8, 16, 64, 4, 4, 5, 7); + sqrt0 = sqrt; + } + wires { + group run_sqrt { + x.addr0 = sqrt0.x0_0_0_0_addr0; + sqrt0.x0_0_0_0_read_data = x.read_data; + x.addr1 = sqrt0.x0_0_0_0_addr1; + x.addr2 = sqrt0.x0_0_0_0_addr2; + x1.addr0 = sqrt0.x10_0_0_0_addr0; + x1.write_data = sqrt0.x10_0_0_0_write_data; + x1.write_en = sqrt0.x10_0_0_0_write_en; + sqrt0.x10_0_0_0_done = x1.done; + sqrt0.go = 1'd1; + run_sqrt[done] = sqrt0.done ? 1'd1; + } + } + control { + seq { + run_sqrt; + } + } +} diff --git a/frontends/relay-futil/tests/sqrt.relay b/frontends/relay-futil/tests/sqrt.relay new file mode 100644 index 0000000000..abb0faaae8 --- /dev/null +++ b/frontends/relay-futil/tests/sqrt.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%x: Tensor[(8, 8, 16, 64), int32]) { + let %x1 = sqrt(%x); + %x1 +} + From 87dfc0365439bcda16848e50e492e4548f56e370 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Wed, 2 Dec 2020 13:37:50 -0500 Subject: [PATCH 58/75] Initial. --- frontends/relay-futil/compiler.py | 14 +- frontends/relay-futil/dahlia_functions.py | 88 +++- frontends/relay-futil/example.py | 14 +- .../relay-futil/tests/data/max_pool2d.expect | 162 ++++++++ .../relay-futil/tests/data/max_pool2d.relay | 6 + .../tests/data/max_pool2d.relay.data | 20 + .../relay-futil/tests/data/softmax.expect | 14 + .../relay-futil/tests/data/softmax.relay | 6 + .../relay-futil/tests/data/softmax.relay.data | 10 + .../tests/data/tensor4d_multiply.expect | 0 .../tests/data/tensor4d_multiply.relay | 5 + .../tests/data/tensor4d_multiply.relay.data | 23 ++ frontends/relay-futil/tests/max_pool2d.expect | 379 ++++++++++++++++++ frontends/relay-futil/tests/max_pool2d.relay | 6 + frontends/relay-futil/tests/mlp_net.expect | 179 +++++++++ frontends/relay-futil/tests/mlp_net.relay | 5 +- frontends/relay-futil/tests/softmax.expect | 193 +++++++++ frontends/relay-futil/tests/softmax.relay | 6 + frontends/relay-futil/utilities.py | 9 +- 19 files changed, 1116 insertions(+), 23 deletions(-) create mode 100644 frontends/relay-futil/tests/data/max_pool2d.expect create mode 100644 frontends/relay-futil/tests/data/max_pool2d.relay create mode 100644 frontends/relay-futil/tests/data/max_pool2d.relay.data create mode 100644 frontends/relay-futil/tests/data/softmax.expect create mode 100644 frontends/relay-futil/tests/data/softmax.relay create mode 100644 frontends/relay-futil/tests/data/softmax.relay.data create mode 100644 frontends/relay-futil/tests/data/tensor4d_multiply.expect create mode 100644 frontends/relay-futil/tests/data/tensor4d_multiply.relay create mode 100644 frontends/relay-futil/tests/data/tensor4d_multiply.relay.data create mode 100644 frontends/relay-futil/tests/max_pool2d.expect create mode 100644 frontends/relay-futil/tests/max_pool2d.relay create mode 100644 frontends/relay-futil/tests/softmax.expect create mode 100644 frontends/relay-futil/tests/softmax.relay diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 86fd5bd3e3..5212354512 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -14,8 +14,8 @@ # Mapping from Relay function names to their respective Dahlia lowering. RelayFunctionCalls = {'nn.dense': dense, 'nn.batch_flatten': batch_flatten, 'nn.batch_matmul': batch_matmul, - 'nn.bias_add': bias_add, 'nn.relu': relu, 'negative': negative, 'expand_dims': expand_dims, - 'sqrt': sqrt} + 'nn.bias_add': bias_add, 'nn.relu': relu, 'nn.softmax': softmax, 'nn.max_pool2d': max_pool2d, + 'negative': negative, 'expand_dims': expand_dims, 'sqrt': sqrt} # Mapping between primitive type and associated Dahlia name extension. # E.g. A 2D memory primitive named `A` will be lowered to `A0_0`. @@ -58,10 +58,10 @@ def relay_id(self, name): def dahlia_name(self, name, type): """ - Dahlia uses the following naming scheme for an arbitrary variable 'X': - Memory1D: 'X0', 'X1', 'X2', ... - Memory2D: 'X0_0', 'X1_0', 'X2_0', ... - Memory3D: 'X0_0_0', 'X1_0_0', 'X2_0_0', ... + Dahlia uses the following naming scheme for arbitrary variables `X`, `Y`: + Memory1D: `X0`, `Y0`, ... + Memory2D: `X0_0`, `Y0_0`, ... + Memory3D: `X0_0_0`, `Y0_0_0`, ... """ assert type in DahliaNameExtension, f'{name} with {type} is not supported yet.' return name + DahliaNameExtension[type] @@ -80,7 +80,7 @@ def get_dahlia_declaration(self, function_name, cells, args, attrs): function = RelayFunctionCalls[function_name] name = function.__name__ else: - assert False, f'{function_name} with type {input_type} is not supported.' + assert False, f'{function_name} is not supported for lowering to FuTIL.' return DahliaDeclaration(component_name=self.relay_id(name), decl_name=self.id(name), op=op, inputs=args, attributes=attrs, function=function) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 106c000205..e266904fb7 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -52,7 +52,7 @@ def broadcast(declaration): """ https://numpy.org/doc/stable/user/basics.broadcasting.html Implements array broadcasting: - Two dimensions are compatible when either (1) they're equal, or (2) one of them is 1. + Two dimensions are compatible when either (1) they're equal, or (2) one of them is `1`. It is not required that both operands have the same number of dimensions either. - When lowering from Relay IR, we are guaranteed the arrays are compatible for broadcasting. - Variable names for indexing through the array begin with `i`, and continue alphabetically. @@ -88,11 +88,9 @@ def broadcast(declaration): res_indices.append(current_dimension) if op1_dims > op2_dims and len(op2_sizes) <= i: op1_indices.append(current_dimension) - continue - if op2_dims > op1_dims and len(op1_sizes) <= i: + elif op2_dims > op1_dims and len(op1_sizes) <= i: op2_indices.append(current_dimension) - continue - if op1_sizes[i] == op2_sizes[i]: + elif op1_sizes[i] == op2_sizes[i]: op1_indices.append(current_dimension) op2_indices.append(current_dimension) elif op1_sizes[i] > op2_sizes[i]: @@ -170,11 +168,11 @@ def bias_add(declaration): def relu(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.relu""" data, res = declaration.inputs[0].primitive, declaration.output.primitive - bitwidth, num_dimensions = data.data[0], data.type + bitwidth, num_dimensions, data_type = data.data[0], data.type, data.data_type declarations = pp_dahlia_memory_declarations([data, res]) - zero = '0.0' if data.data_type == 'ufix' else '0' - let_zero = f'let zero: {data.data_type}<{bitwidth}> = {zero};' + zero = '0.0' if data_type == 'ufix' or data_type == 'fix' else '0' + let_zero = f'let zero: {data_type}<{bitwidth}> = {zero};' indices = "" variable_name = CHARACTER_I @@ -301,7 +299,7 @@ def batch_matmul(declaration): # TODO(cgyurgyik): Similar to batch_matmul, this requires a temporary memory to store the output # of the matrix multiply. Otherwise, the values aren't computed properly. Look deeper into this. def dense(declaration): - """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_matmul""" + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.dense""" # TODO(cgyurgyik): Add support for `units`. units = declaration.attributes.get_int("units") op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive @@ -335,3 +333,75 @@ def dense(declaration): }} """ return lower_dahlia_program(program, declaration.component_name) + + +# TODO(cgyurgyik): Currently, only supports a small subset (namely those used in our VGG net and MLP net examples). +def softmax(declaration): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.softmax""" + op, res = declaration.inputs[0].primitive, declaration.output.primitive + axis = declaration.attributes.get_int("axis") + data_type = op.data_type + assert op.type == PrimitiveType.Memory2D, f'nn.softmax with pritmive type Memory{op.type}D is not supported.' + assert axis == -1 or axis == 1, f'nn.softmax with axis = {axis} is not supported.' + bitwidth, size0, size1, index_size0, index_size1 = op.data[0], op.data[1], op.data[2], op.data[3], op.data[4] + + import_exp = f"""import "std_exp.h" {{ def exp(x: {data_type}<{bitwidth}>): {data_type}<{bitwidth}>; }}""" + declarations = pp_dahlia_memory_declarations([res, op]) + + zero = '0.0' if data_type == 'ufix' or data_type == 'fix' else '0' + body = f""" + for (let i: ubit<{index_size0}> = 0..{size0}) {{ + let {op.name}_expsum: {data_type}<{bitwidth}> = {zero}; + for (let j: ubit<{index_size1}> = 0..{size1}) {{ {op.name}_expsum += exp({op.name}[i][j]); }} + for (let k: ubit<{index_size1}> = 0..{size1}) {{ + {res.name}[i][k] := exp({op.name}[i][k]); + --- + {res.name}[i][k] := {res.name}[i][k] / {op.name}_expsum; + }} + }} + """ + program = f"""{import_exp}{NEWL}{declarations}{body}""" + return lower_dahlia_program(program, declaration.component_name) + + +def max_pool2d(declaration): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.max_pool2d""" + data, res = declaration.inputs[0].primitive, declaration.output.primitive + + strides = declaration.attributes.get_int_tuple("strides") + pool_size = declaration.attributes.get_int_tuple("pool_size") + padding = declaration.attributes.get_int_tuple("padding") + layout = declaration.attributes.get_str("layout") + ceil_mode = declaration.attributes.get_int("ceil_mode") + for p in padding: assert p == 0, f"Non-zero padding: {padding} is not currently supported for nn.max_pool2d" + assert layout == 'NCHW', f"Layout \'{layout}\' is not currently supported for nn.max_pool2d; please use `NCHW`" + assert ceil_mode == False, "`ceil_mode` is not currently supported for nn.max_pool2d" + bitwidth, data_type = data.data[0], data.data_type + size0, size1, size2, size3 = res.data[1], res.data[2], res.data[3], res.data[4] + + declarations = pp_dahlia_memory_declarations([res, data]) + program_body = f""" + for (let i: ubit<32> = 0..{size0}) {{ + for (let j: ubit<32> = 0..{size1}) {{ + for (let k: ubit<32> = 0..{size2}) {{ + for (let l: ubit<32> = 0..{size3}) {{ + let stride_x: ubit<32> = k * {strides[0]}/*stride[0]*/; + let stride_y: ubit<32> = l * {strides[1]}/*stride[1]*/; + + let max: {data_type}<{bitwidth}> = {data.name}[i][j][stride_x][stride_y]; + for (let m: ubit<32> = 0..{pool_size[0]}/*pool_size[0]*/) {{ + for (let n: ubit<32> = 0..{pool_size[1]}/*pool_size[1]*/) {{ + let pool_x: ubit<32> = stride_x + m; + let pool_y: ubit<32> = stride_y + n; + let current: {data_type}<{bitwidth}> = {data.name}[i][j][pool_x][pool_y]; + if (current > max) {{ max := current; }} else {{ max := max; }} + }} + }} + {res.name}[i][j][k][l] := max; + }} + }} + }} + }} + """ + program = f"""{declarations}{NEWL}{program_body}""" + return lower_dahlia_program(program, declaration.component_name) diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index 0a986e9453..69db563726 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -49,6 +49,15 @@ def dense(): return relay.Function([x, y], relay.nn.dense(x, y, units=10)) +def softmax(): + x = relay.var('x', shape=[1, 10], dtype='float32') + return relay.Function([x], relay.nn.softmax(x)) + + +def max_pool2d(): + data = relay.var('data', shape=[2, 2, 4, 4], dtype='int32') + return relay.Function([data], relay.nn.max_pool2d(data, padding=[0,0,0,0], strides=[2,2], pool_size=[2,2])) + def mlp_net(): """The MLP test from Relay.""" from tvm.relay.testing import mlp @@ -58,11 +67,12 @@ def mlp_net(): def vgg_net(): """The VGG test from Relay.""" from tvm.relay.testing import vgg - return vgg.get_net(batch_size=1, image_shape=(3, 224, 224), num_classes=10, dtype='int32', num_layers=11, + return vgg.get_net(batch_size=5, image_shape=(3, 224, 224), num_classes=10, dtype='int32', num_layers=13, batch_norm=True) -ALL_FUNCS = [add, tensor_subtract, expand_dims, batch_flatten, batch_matmul, bias_add, relu, dense, mlp_net, vgg_net] +ALL_FUNCS = [add, tensor_subtract, expand_dims, batch_flatten, batch_matmul, bias_add, relu, dense, softmax, mlp_net, + vgg_net, max_pool2d] FUNC_NAMES = list(map(lambda x: x.__name__, ALL_FUNCS)) diff --git a/frontends/relay-futil/tests/data/max_pool2d.expect b/frontends/relay-futil/tests/data/max_pool2d.expect new file mode 100644 index 0000000000..a1c95c7c88 --- /dev/null +++ b/frontends/relay-futil/tests/data/max_pool2d.expect @@ -0,0 +1,162 @@ +{ + "data": [ + [ + [ + [ + 12, + 30, + 34, + 37 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 8, + 2, + 112, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 20, + 0, + 70, + 25 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 12, + 0, + 100, + 12 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ] + ], + "result": [ + [ + [ + [ + 30, + 37 + ], + [ + 2, + 4 + ] + ], + [ + [ + 8, + 112 + ], + [ + 2, + 4 + ] + ] + ], + [ + [ + [ + 20, + 70 + ], + [ + 2, + 4 + ] + ], + [ + [ + 12, + 100 + ], + [ + 2, + 4 + ] + ] + ] + ] +} diff --git a/frontends/relay-futil/tests/data/max_pool2d.relay b/frontends/relay-futil/tests/data/max_pool2d.relay new file mode 100644 index 0000000000..e1ba79d351 --- /dev/null +++ b/frontends/relay-futil/tests/data/max_pool2d.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%data: Tensor[(2, 2, 4, 4), int32]) { + let %result: Tensor[(2, 2, 2, 2), int32] = nn.max_pool2d(%data, pool_size=[2, 2], strides=[2, 2], padding=[0,0,0,0]); + %result +} + diff --git a/frontends/relay-futil/tests/data/max_pool2d.relay.data b/frontends/relay-futil/tests/data/max_pool2d.relay.data new file mode 100644 index 0000000000..9e19e8a053 --- /dev/null +++ b/frontends/relay-futil/tests/data/max_pool2d.relay.data @@ -0,0 +1,20 @@ +{ + "data": { + "data": [ + [[[12,30,34,37], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], [[[8,2,112,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], + [[[20,0,70,25], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], [[[12,0,100,12], [1,2,3,4], [1,2,3,4], [1,2,3,4]]] + ], + "bitwidth": 32 + }, + "max": { + "data": [0], + "bitwidth": 32 + }, + "result": { + "data": [ + [[[0,0], [0,0]], [[0,0], [0,0]]], + [[[0,0], [0,0]], [[0,0], [0,0]]] + ], + "bitwidth": 32 + } +} \ No newline at end of file diff --git a/frontends/relay-futil/tests/data/softmax.expect b/frontends/relay-futil/tests/data/softmax.expect new file mode 100644 index 0000000000..1073dc7c6c --- /dev/null +++ b/frontends/relay-futil/tests/data/softmax.expect @@ -0,0 +1,14 @@ +{ + "x": [ + [ + 4, + 16 + ] + ], + "x1": [ + [ + 0, + 0 + ] + ] +} diff --git a/frontends/relay-futil/tests/data/softmax.relay b/frontends/relay-futil/tests/data/softmax.relay new file mode 100644 index 0000000000..858ae52126 --- /dev/null +++ b/frontends/relay-futil/tests/data/softmax.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%x: Tensor[(1, 2), float32]) { + let %x1: Tensor[(1, 2), float32] = nn.softmax(%x); + %x1 +} + diff --git a/frontends/relay-futil/tests/data/softmax.relay.data b/frontends/relay-futil/tests/data/softmax.relay.data new file mode 100644 index 0000000000..f0d81e4e55 --- /dev/null +++ b/frontends/relay-futil/tests/data/softmax.relay.data @@ -0,0 +1,10 @@ +{ + "x": { + "data": [[4, 16]], + "bitwidth": 32 + }, + "x1": { + "data": [[0, 0]], + "bitwidth": 32 + } +} \ No newline at end of file diff --git a/frontends/relay-futil/tests/data/tensor4d_multiply.expect b/frontends/relay-futil/tests/data/tensor4d_multiply.expect new file mode 100644 index 0000000000..e69de29bb2 diff --git a/frontends/relay-futil/tests/data/tensor4d_multiply.relay b/frontends/relay-futil/tests/data/tensor4d_multiply.relay new file mode 100644 index 0000000000..197d3c9564 --- /dev/null +++ b/frontends/relay-futil/tests/data/tensor4d_multiply.relay @@ -0,0 +1,5 @@ +v0.0.4 +fn (%x: Tensor[(2, 2, 4, 4), int32], %x1: Tensor[(2, 2, 4, 4), int32]) { + let %x2: Tensor[(2, 2, 4, 4), int32] = multiply(%x, %x1); + %x2 +} diff --git a/frontends/relay-futil/tests/data/tensor4d_multiply.relay.data b/frontends/relay-futil/tests/data/tensor4d_multiply.relay.data new file mode 100644 index 0000000000..6cdaa8c7a7 --- /dev/null +++ b/frontends/relay-futil/tests/data/tensor4d_multiply.relay.data @@ -0,0 +1,23 @@ +{ + "x": { + "data": [ + [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], + [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]] + ], + "bitwidth": 32 + }, + "x1": { + "data": [ + [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], + [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]] + ], + "bitwidth": 32 + }, + "x2": { + "data": [ + [[[0,0,0,0], [0,0,0,0], [0,0,0,0], [0,0,0,0]]], [[[0,0,0,0], [0,0,0,0], [0,0,0,0], [0,0,0,0]]], + [[[0,0,0,0], [0,0,0,0], [0,0,0,0], [0,0,0,0]]], [[[0,0,0,0], [0,0,0,0], [0,0,0,0], [0,0,0,0]]] + ], + "bitwidth": 32 + } +} \ No newline at end of file diff --git a/frontends/relay-futil/tests/max_pool2d.expect b/frontends/relay-futil/tests/max_pool2d.expect new file mode 100644 index 0000000000..cbd0da89b2 --- /dev/null +++ b/frontends/relay-futil/tests/max_pool2d.expect @@ -0,0 +1,379 @@ +import "primitives/std.lib"; + +component max_pool2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, result0_0_0_0_read_data: 32, result0_0_0_0_done: 1) -> (done: 1, data0_0_0_0_addr0: 2, data0_0_0_0_addr1: 2, data0_0_0_0_addr2: 3, data0_0_0_0_addr3: 3, data0_0_0_0_write_data: 32, data0_0_0_0_write_en: 1, data0_0_0_0_clk: 1, result0_0_0_0_addr0: 2, result0_0_0_0_addr1: 2, result0_0_0_0_addr2: 2, result0_0_0_0_addr3: 2, result0_0_0_0_write_data: 32, result0_0_0_0_write_en: 1, result0_0_0_0_clk: 1) { + cells { + add0 = prim std_add(32); + add1 = prim std_add(32); + add2 = prim std_add(32); + add3 = prim std_add(32); + add4 = prim std_add(32); + add5 = prim std_add(32); + add6 = prim std_add(32); + add7 = prim std_add(32); + bin_read0_0 = prim std_reg(32); + bin_read1_0 = prim std_reg(32); + const0 = prim std_const(32, 0); + const1 = prim std_const(32, 1); + const10 = prim std_const(32, 0); + const11 = prim std_const(32, 1); + const12 = prim std_const(32, 0); + const13 = prim std_const(32, 1); + const14 = prim std_const(32, 1); + const15 = prim std_const(32, 1); + const16 = prim std_const(32, 1); + const17 = prim std_const(32, 1); + const18 = prim std_const(32, 1); + const19 = prim std_const(32, 1); + const2 = prim std_const(32, 0); + const3 = prim std_const(32, 1); + const4 = prim std_const(32, 0); + const5 = prim std_const(32, 1); + const6 = prim std_const(32, 0); + const7 = prim std_const(32, 1); + const8 = prim std_const(32, 2); + const9 = prim std_const(32, 2); + data_read0_0 = prim std_reg(32); + data_read1_0 = prim std_reg(32); + gt0 = prim std_gt(32); + i0 = prim std_reg(32); + j0 = prim std_reg(32); + k0 = prim std_reg(32); + l0 = prim std_reg(32); + le0 = prim std_le(32); + le1 = prim std_le(32); + le2 = prim std_le(32); + le3 = prim std_le(32); + le4 = prim std_le(32); + le5 = prim std_le(32); + m0 = prim std_reg(32); + max_0 = prim std_reg(32); + mult_pipe0 = prim std_mult_pipe(32); + mult_pipe1 = prim std_mult_pipe(32); + n0 = prim std_reg(32); + pool_x_0 = prim std_reg(32); + pool_y_0 = prim std_reg(32); + slice0 = prim std_slice(32, 2); + slice1 = prim std_slice(32, 2); + slice10 = prim std_slice(32, 3); + slice11 = prim std_slice(32, 3); + slice12 = prim std_slice(32, 2); + slice13 = prim std_slice(32, 2); + slice14 = prim std_slice(32, 2); + slice15 = prim std_slice(32, 2); + slice2 = prim std_slice(32, 3); + slice3 = prim std_slice(32, 3); + slice4 = prim std_slice(32, 2); + slice5 = prim std_slice(32, 2); + slice6 = prim std_slice(32, 3); + slice7 = prim std_slice(32, 3); + slice8 = prim std_slice(32, 2); + slice9 = prim std_slice(32, 2); + stride_k_0 = prim std_reg(32); + stride_l_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = k0.out; + le2.right = const5.out; + } + group cond3<"static"=0> { + cond3[done] = 1'd1; + le3.left = l0.out; + le3.right = const7.out; + } + group cond4<"static"=0> { + cond4[done] = 1'd1; + le4.left = m0.out; + le4.right = const11.out; + } + group cond5<"static"=0> { + cond5[done] = 1'd1; + le5.left = n0.out; + le5.right = const13.out; + } + group cond6<"static"=0> { + cond6[done] = 1'd1; + gt0.left = data_read0_0.out; + gt0.right = max_0.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let1[done] = j0.done; + } + group let10<"static"=1> { + pool_x_0.in = add0.out; + pool_x_0.write_en = 1'd1; + let10[done] = pool_x_0.done; + add0.left = stride_k_0.out; + add0.right = m0.out; + } + group let11<"static"=1> { + pool_y_0.in = add1.out; + pool_y_0.write_en = 1'd1; + let11[done] = pool_y_0.done; + add1.left = stride_l_0.out; + add1.right = n0.out; + } + group let2<"static"=1> { + k0.in = const4.out; + k0.write_en = 1'd1; + let2[done] = k0.done; + } + group let3<"static"=1> { + l0.in = const6.out; + l0.write_en = 1'd1; + let3[done] = l0.done; + } + group let4<"static"=4> { + bin_read0_0.in = mult_pipe0.out; + bin_read0_0.write_en = mult_pipe0.done; + let4[done] = bin_read0_0.done; + mult_pipe0.left = k0.out; + mult_pipe0.right = const8.out; + mult_pipe0.go = !mult_pipe0.done ? 1'd1; + } + group let5<"static"=1> { + stride_k_0.in = bin_read0_0.out; + stride_k_0.write_en = 1'd1; + let5[done] = stride_k_0.done; + } + group let6<"static"=4> { + bin_read1_0.in = mult_pipe1.out; + bin_read1_0.write_en = mult_pipe1.done; + let6[done] = bin_read1_0.done; + mult_pipe1.left = l0.out; + mult_pipe1.right = const9.out; + mult_pipe1.go = !mult_pipe1.done ? 1'd1; + } + group let7<"static"=1> { + stride_l_0.in = bin_read1_0.out; + stride_l_0.write_en = 1'd1; + let7[done] = stride_l_0.done; + } + group let8<"static"=1> { + m0.in = const10.out; + m0.write_en = 1'd1; + let8[done] = m0.done; + } + group let9<"static"=1> { + n0.in = const12.out; + n0.write_en = 1'd1; + let9[done] = n0.done; + } + group upd0<"static"=1> { + max_0.write_en = 1'd1; + data0_0_0_0_addr3 = slice3.out; + slice3.in = stride_l_0.out; + data0_0_0_0_addr2 = slice2.out; + slice2.in = stride_k_0.out; + data0_0_0_0_addr1 = slice1.out; + slice1.in = j0.out; + data0_0_0_0_addr0 = slice0.out; + slice0.in = i0.out; + max_0.in = 1'd1 ? data0_0_0_0_read_data; + upd0[done] = max_0.done ? 1'd1; + } + group upd1<"static"=1> { + data_read0_0.write_en = 1'd1; + data0_0_0_0_addr3 = slice7.out; + slice7.in = pool_y_0.out; + data0_0_0_0_addr2 = slice6.out; + slice6.in = pool_x_0.out; + data0_0_0_0_addr1 = slice5.out; + slice5.in = j0.out; + data0_0_0_0_addr0 = slice4.out; + slice4.in = i0.out; + data_read0_0.in = 1'd1 ? data0_0_0_0_read_data; + upd1[done] = data_read0_0.done ? 1'd1; + } + group upd10<"static"=1> { + i0.write_en = 1'd1; + add7.left = i0.out; + add7.right = const19.out; + i0.in = 1'd1 ? add7.out; + upd10[done] = i0.done ? 1'd1; + } + group upd2<"static"=1> { + data_read1_0.write_en = 1'd1; + data0_0_0_0_addr3 = slice11.out; + slice11.in = pool_y_0.out; + data0_0_0_0_addr2 = slice10.out; + slice10.in = pool_x_0.out; + data0_0_0_0_addr1 = slice9.out; + slice9.in = j0.out; + data0_0_0_0_addr0 = slice8.out; + slice8.in = i0.out; + data_read1_0.in = 1'd1 ? data0_0_0_0_read_data; + upd2[done] = data_read1_0.done ? 1'd1; + } + group upd3<"static"=1> { + max_0.write_en = 1'd1; + max_0.in = 1'd1 ? data_read1_0.out; + upd3[done] = max_0.done ? 1'd1; + } + group upd4<"static"=1> { + n0.write_en = 1'd1; + add2.left = n0.out; + add2.right = const14.out; + n0.in = 1'd1 ? add2.out; + upd4[done] = n0.done ? 1'd1; + } + group upd5<"static"=1> { + m0.write_en = 1'd1; + add3.left = m0.out; + add3.right = const15.out; + m0.in = 1'd1 ? add3.out; + upd5[done] = m0.done ? 1'd1; + } + group upd6<"static"=1> { + result0_0_0_0_addr3 = slice15.out; + slice15.in = l0.out; + result0_0_0_0_addr2 = slice14.out; + slice14.in = k0.out; + result0_0_0_0_addr1 = slice13.out; + slice13.in = j0.out; + result0_0_0_0_addr0 = slice12.out; + slice12.in = i0.out; + result0_0_0_0_write_en = 1'd1; + result0_0_0_0_write_data = 1'd1 ? max_0.out; + upd6[done] = result0_0_0_0_done ? 1'd1; + } + group upd7<"static"=1> { + l0.write_en = 1'd1; + add4.left = l0.out; + add4.right = const16.out; + l0.in = 1'd1 ? add4.out; + upd7[done] = l0.done ? 1'd1; + } + group upd8<"static"=1> { + k0.write_en = 1'd1; + add5.left = k0.out; + add5.right = const17.out; + k0.in = 1'd1 ? add5.out; + upd8[done] = k0.done ? 1'd1; + } + group upd9<"static"=1> { + j0.write_en = 1'd1; + add6.left = j0.out; + add6.right = const18.out; + j0.in = 1'd1 ? add6.out; + upd9[done] = j0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + let2; + while le2.out with cond2 { + seq { + let3; + while le3.out with cond3 { + seq { + par { + seq { + let4; + let5; + } + seq { + let6; + let7; + } + } + upd0; + par { + seq { + let8; + while le4.out with cond4 { + seq { + let9; + while le5.out with cond5 { + seq { + par { + let10; + let11; + } + upd1; + if gt0.out with cond6 { + seq { + upd2; + upd3; + } + } + upd4; + } + } + upd5; + } + } + } + upd6; + } + upd7; + } + } + upd8; + } + } + upd9; + } + } + upd10; + } + } + } + } +} + +component main () -> () { + cells { + result = prim std_mem_d4(32, 2, 2, 2, 2, 2, 2, 2, 2); + data = prim std_mem_d4(32, 2, 2, 4, 4, 2, 2, 3, 3); + max_pool2d0 = max_pool2d; + } + wires { + group run_max_pool2d { + data.addr0 = max_pool2d0.data0_0_0_0_addr0; + max_pool2d0.data0_0_0_0_read_data = data.read_data; + data.addr1 = max_pool2d0.data0_0_0_0_addr1; + data.addr2 = max_pool2d0.data0_0_0_0_addr2; + data.addr3 = max_pool2d0.data0_0_0_0_addr3; + result.addr0 = max_pool2d0.result0_0_0_0_addr0; + result.addr1 = max_pool2d0.result0_0_0_0_addr1; + result.addr2 = max_pool2d0.result0_0_0_0_addr2; + result.addr3 = max_pool2d0.result0_0_0_0_addr3; + result.write_data = max_pool2d0.result0_0_0_0_write_data; + result.write_en = max_pool2d0.result0_0_0_0_write_en; + max_pool2d0.result0_0_0_0_done = result.done; + max_pool2d0.go = 1'd1; + run_max_pool2d[done] = max_pool2d0.done ? 1'd1; + } + } + control { + seq { + run_max_pool2d; + } + } +} diff --git a/frontends/relay-futil/tests/max_pool2d.relay b/frontends/relay-futil/tests/max_pool2d.relay new file mode 100644 index 0000000000..e1ba79d351 --- /dev/null +++ b/frontends/relay-futil/tests/max_pool2d.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%data: Tensor[(2, 2, 4, 4), int32]) { + let %result: Tensor[(2, 2, 2, 2), int32] = nn.max_pool2d(%data, pool_size=[2, 2], strides=[2, 2], padding=[0,0,0,0]); + %result +} + diff --git a/frontends/relay-futil/tests/mlp_net.expect b/frontends/relay-futil/tests/mlp_net.expect index 812a0381fd..b780037235 100644 --- a/frontends/relay-futil/tests/mlp_net.expect +++ b/frontends/relay-futil/tests/mlp_net.expect @@ -1,5 +1,169 @@ import "primitives/std.lib"; +component softmax(go: 1, clk: 1, x80_0_read_data: 32, x80_0_done: 1, x90_0_read_data: 32, x90_0_done: 1) -> (done: 1, x80_0_addr0: 1, x80_0_addr1: 4, x80_0_write_data: 32, x80_0_write_en: 1, x80_0_clk: 1, x90_0_addr0: 1, x90_0_addr1: 4, x90_0_write_data: 32, x90_0_write_en: 1, x90_0_clk: 1) { + cells { + add0 = prim fixed_p_std_add(32, 16, 16); + add1 = prim std_add(4); + add2 = prim std_add(4); + add3 = prim std_add(1); + bin_read0_0 = prim std_reg(32); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(4, 0); + const3 = prim std_const(4, 9); + const4 = prim std_const(4, 1); + const5 = prim std_const(4, 0); + const6 = prim std_const(4, 9); + const7 = prim std_const(4, 1); + const8 = prim std_const(1, 1); + div_pipe0 = prim std_div_pipe(32); + exp0 = prim std_exp(); + exp1 = prim std_exp(); + fpconst0 = prim fixed_p_std_const(32, 16, 16, 0, 0); + i0 = prim std_reg(1); + j0 = prim std_reg(4); + k0 = prim std_reg(4); + le0 = prim std_le(1); + le1 = prim std_le(4); + le2 = prim std_le(4); + slice0 = prim std_slice(32, 32); + x8_expsum_0 = prim std_reg(32); + x8_read0_0 = prim std_reg(32); + x8_read1_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = k0.out; + le2.right = const6.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + x8_expsum_0.in = fpconst0.out; + x8_expsum_0.write_en = 1'd1; + let1[done] = x8_expsum_0.done; + } + group let2<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let2[done] = j0.done; + } + group let3<"static"=1> { + k0.in = const5.out; + k0.write_en = 1'd1; + let3[done] = k0.done; + } + group let4<"static"=1> { + bin_read0_0.in = slice0.out; + bin_read0_0.write_en = 1'd1; + let4[done] = bin_read0_0.done; + slice0.in = div_pipe0.out; + div_pipe0.left = exp1.out; + div_pipe0.right = x8_expsum_0.out; + div_pipe0.go = !div_pipe0.done ? 1'd1; + exp1.exponent = x8_read1_0.out; + exp1.go = !exp1.done ? 1'd1; + } + group upd0<"static"=1> { + x8_read0_0.write_en = 1'd1; + x80_0_addr1 = j0.out; + x80_0_addr0 = i0.out; + x8_read0_0.in = 1'd1 ? x80_0_read_data; + upd0[done] = x8_read0_0.done ? 1'd1; + } + group upd1<"static"=2> { + x8_expsum_0.write_en = 1'd1; + add0.left = x8_expsum_0.out; + add0.right = exp0.out; + exp0.exponent = x8_read0_0.out; + exp0.go = !exp0.done ? 1'd1; + x8_expsum_0.in = 1'd1 ? add0.out; + upd1[done] = x8_expsum_0.done ? 1'd1; + } + group upd2<"static"=1> { + j0.write_en = 1'd1; + add1.left = j0.out; + add1.right = const4.out; + j0.in = 1'd1 ? add1.out; + upd2[done] = j0.done ? 1'd1; + } + group upd3<"static"=1> { + x8_read1_0.write_en = 1'd1; + x80_0_addr1 = k0.out; + x80_0_addr0 = i0.out; + x8_read1_0.in = 1'd1 ? x80_0_read_data; + upd3[done] = x8_read1_0.done ? 1'd1; + } + group upd4<"static"=1> { + x90_0_addr1 = k0.out; + x90_0_addr0 = i0.out; + x90_0_write_en = 1'd1; + x90_0_write_data = 1'd1 ? bin_read0_0.out; + upd4[done] = x90_0_done ? 1'd1; + } + group upd5<"static"=1> { + k0.write_en = 1'd1; + add2.left = k0.out; + add2.right = const7.out; + k0.in = 1'd1 ? add2.out; + upd5[done] = k0.done ? 1'd1; + } + group upd6<"static"=1> { + i0.write_en = 1'd1; + add3.left = i0.out; + add3.right = const8.out; + i0.in = 1'd1 ? add3.out; + upd6[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + par { + let1; + seq { + let2; + while le1.out with cond1 { + seq { + upd0; + upd1; + upd2; + } + } + } + } + let3; + while le2.out with cond2 { + seq { + upd3; + let4; + upd4; + upd5; + } + } + upd6; + } + } + } + } +} component bias_add2(go: 1, clk: 1, fc3_bias0_read_data: 32, fc3_bias0_done: 1, x70_0_read_data: 32, x70_0_done: 1, x80_0_read_data: 32, x80_0_done: 1) -> (done: 1, fc3_bias0_addr0: 4, fc3_bias0_write_data: 32, fc3_bias0_write_en: 1, fc3_bias0_clk: 1, x70_0_addr0: 1, x70_0_addr1: 4, x70_0_write_data: 32, x70_0_write_en: 1, x70_0_clk: 1, x80_0_addr0: 1, x80_0_addr1: 4, x80_0_write_data: 32, x80_0_write_en: 1, x80_0_clk: 1) { cells { add0 = prim fixed_p_std_add(32, 16, 16); @@ -1623,7 +1787,9 @@ component batch_flatten(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_do component main () -> () { cells { + x9 = prim std_mem_d2(32, 1, 10, 1, 4); x8 = prim std_mem_d2(32, 1, 10, 1, 4); + softmax0 = softmax; x7 = prim std_mem_d2(32, 1, 10, 1, 4); fc3_bias = prim std_mem_d1(32, 10, 4); bias_add2 = bias_add2; @@ -1774,6 +1940,18 @@ component main () -> () { bias_add2.go = 1'd1; run_bias_add2[done] = bias_add2.done ? 1'd1; } + group run_softmax { + x8.addr0 = softmax0.x80_0_addr0; + softmax0.x80_0_read_data = x8.read_data; + x8.addr1 = softmax0.x80_0_addr1; + x9.addr0 = softmax0.x90_0_addr0; + x9.addr1 = softmax0.x90_0_addr1; + x9.write_data = softmax0.x90_0_write_data; + x9.write_en = softmax0.x90_0_write_en; + softmax0.x90_0_done = x9.done; + softmax0.go = 1'd1; + run_softmax[done] = softmax0.done ? 1'd1; + } } control { seq { @@ -1786,6 +1964,7 @@ component main () -> () { run_relu1; run_dense2; run_bias_add2; + run_softmax; } } } diff --git a/frontends/relay-futil/tests/mlp_net.relay b/frontends/relay-futil/tests/mlp_net.relay index 8943360100..4368b51016 100644 --- a/frontends/relay-futil/tests/mlp_net.relay +++ b/frontends/relay-futil/tests/mlp_net.relay @@ -11,7 +11,6 @@ fn (%data: Tensor[(1, 1, 28, 28), float32], %fc1_weight: Tensor[(128, 784), floa let %x6: Tensor[(1, 64), float32] = nn.relu(%x5) /* ty=Tensor[(1, 64), float32] */; let %x7: Tensor[(1, 10), float32] = nn.dense(%x6, %fc3_weight, units=10) /* ty=Tensor[(1, 10), float32] */; let %x8: Tensor[(1, 10), float32] = nn.bias_add(%x7, %fc3_bias, axis=-1) /* ty=Tensor[(1, 10), float32] */; - %x8 - // let %x9: Tensor[(1, 10), float32] = nn.softmax(%x8) /* ty=Tensor[(1, 10), float32] */; - // %x9 + let %x9: Tensor[(1, 10), float32] = nn.softmax(%x8) /* ty=Tensor[(1, 10), float32] */; + %x9 } diff --git a/frontends/relay-futil/tests/softmax.expect b/frontends/relay-futil/tests/softmax.expect new file mode 100644 index 0000000000..8fc76f5b1c --- /dev/null +++ b/frontends/relay-futil/tests/softmax.expect @@ -0,0 +1,193 @@ +import "primitives/std.lib"; + +component softmax(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, x0_0_addr0: 1, x0_0_addr1: 4, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 4, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { + cells { + add0 = prim fixed_p_std_add(32, 16, 16); + add1 = prim std_add(4); + add2 = prim std_add(4); + add3 = prim std_add(1); + bin_read0_0 = prim std_reg(32); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(4, 0); + const3 = prim std_const(4, 9); + const4 = prim std_const(4, 1); + const5 = prim std_const(4, 0); + const6 = prim std_const(4, 9); + const7 = prim std_const(4, 1); + const8 = prim std_const(1, 1); + div_pipe0 = prim std_div_pipe(32); + exp0 = prim std_exp(); + exp1 = prim std_exp(); + fpconst0 = prim fixed_p_std_const(32, 16, 16, 0, 0); + i0 = prim std_reg(1); + j0 = prim std_reg(4); + k0 = prim std_reg(4); + le0 = prim std_le(1); + le1 = prim std_le(4); + le2 = prim std_le(4); + slice0 = prim std_slice(32, 32); + x_expsum_0 = prim std_reg(32); + x_read0_0 = prim std_reg(32); + x_read1_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = k0.out; + le2.right = const6.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + x_expsum_0.in = fpconst0.out; + x_expsum_0.write_en = 1'd1; + let1[done] = x_expsum_0.done; + } + group let2<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let2[done] = j0.done; + } + group let3<"static"=1> { + k0.in = const5.out; + k0.write_en = 1'd1; + let3[done] = k0.done; + } + group let4<"static"=1> { + bin_read0_0.in = slice0.out; + bin_read0_0.write_en = 1'd1; + let4[done] = bin_read0_0.done; + slice0.in = div_pipe0.out; + div_pipe0.left = exp1.out; + div_pipe0.right = x_expsum_0.out; + div_pipe0.go = !div_pipe0.done ? 1'd1; + exp1.exponent = x_read1_0.out; + exp1.go = !exp1.done ? 1'd1; + } + group upd0<"static"=1> { + x_read0_0.write_en = 1'd1; + x0_0_addr1 = j0.out; + x0_0_addr0 = i0.out; + x_read0_0.in = 1'd1 ? x0_0_read_data; + upd0[done] = x_read0_0.done ? 1'd1; + } + group upd1<"static"=2> { + x_expsum_0.write_en = 1'd1; + add0.left = x_expsum_0.out; + add0.right = exp0.out; + exp0.exponent = x_read0_0.out; + exp0.go = !exp0.done ? 1'd1; + x_expsum_0.in = 1'd1 ? add0.out; + upd1[done] = x_expsum_0.done ? 1'd1; + } + group upd2<"static"=1> { + j0.write_en = 1'd1; + add1.left = j0.out; + add1.right = const4.out; + j0.in = 1'd1 ? add1.out; + upd2[done] = j0.done ? 1'd1; + } + group upd3<"static"=1> { + x_read1_0.write_en = 1'd1; + x0_0_addr1 = k0.out; + x0_0_addr0 = i0.out; + x_read1_0.in = 1'd1 ? x0_0_read_data; + upd3[done] = x_read1_0.done ? 1'd1; + } + group upd4<"static"=1> { + x10_0_addr1 = k0.out; + x10_0_addr0 = i0.out; + x10_0_write_en = 1'd1; + x10_0_write_data = 1'd1 ? bin_read0_0.out; + upd4[done] = x10_0_done ? 1'd1; + } + group upd5<"static"=1> { + k0.write_en = 1'd1; + add2.left = k0.out; + add2.right = const7.out; + k0.in = 1'd1 ? add2.out; + upd5[done] = k0.done ? 1'd1; + } + group upd6<"static"=1> { + i0.write_en = 1'd1; + add3.left = i0.out; + add3.right = const8.out; + i0.in = 1'd1 ? add3.out; + upd6[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + par { + let1; + seq { + let2; + while le1.out with cond1 { + seq { + upd0; + upd1; + upd2; + } + } + } + } + let3; + while le2.out with cond2 { + seq { + upd3; + let4; + upd4; + upd5; + } + } + upd6; + } + } + } + } +} + +component main () -> () { + cells { + x1 = prim std_mem_d2(32, 1, 10, 1, 4); + x = prim std_mem_d2(32, 1, 10, 1, 4); + softmax0 = softmax; + } + wires { + group run_softmax { + x.addr0 = softmax0.x0_0_addr0; + softmax0.x0_0_read_data = x.read_data; + x.addr1 = softmax0.x0_0_addr1; + x1.addr0 = softmax0.x10_0_addr0; + x1.addr1 = softmax0.x10_0_addr1; + x1.write_data = softmax0.x10_0_write_data; + x1.write_en = softmax0.x10_0_write_en; + softmax0.x10_0_done = x1.done; + softmax0.go = 1'd1; + run_softmax[done] = softmax0.done ? 1'd1; + } + } + control { + seq { + run_softmax; + } + } +} diff --git a/frontends/relay-futil/tests/softmax.relay b/frontends/relay-futil/tests/softmax.relay new file mode 100644 index 0000000000..df46a20d70 --- /dev/null +++ b/frontends/relay-futil/tests/softmax.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%x: Tensor[(1, 10), float32]) { + let %x1: Tensor[(1, 10), float32] = nn.softmax(%x); + %x1 +} + diff --git a/frontends/relay-futil/utilities.py b/frontends/relay-futil/utilities.py index aab5fd2a72..fc171f60e9 100644 --- a/frontends/relay-futil/utilities.py +++ b/frontends/relay-futil/utilities.py @@ -96,13 +96,18 @@ def build_main_controls(c: FComponent): wires.append(FWire(f'{prim.name}.addr1', f'{declaration.decl_name}.{input.dahlia_name}_addr1')) if prim.type == PrimitiveType.Memory2D: continue wires.append(FWire(f'{prim.name}.addr2', f'{declaration.decl_name}.{input.dahlia_name}_addr2')) + if prim.type == PrimitiveType.Memory3D: continue + wires.append(FWire(f'{prim.name}.addr3', f'{declaration.decl_name}.{input.dahlia_name}_addr3')) output = declaration.output + type = output.primitive.type wires.append(FWire(f'{output.primitive.name}.addr0', f'{declaration.decl_name}.{output.dahlia_name}_addr0')) - if output.primitive.type == PrimitiveType.Memory2D or output.primitive.type == PrimitiveType.Memory3D: + if type == PrimitiveType.Memory2D or type == PrimitiveType.Memory3D or type == PrimitiveType.Memory4D: wires.append(FWire(f'{output.primitive.name}.addr1', f'{declaration.decl_name}.{output.dahlia_name}_addr1')) - if output.primitive.type == PrimitiveType.Memory3D: + if type == PrimitiveType.Memory3D or type == PrimitiveType.Memory4D: wires.append(FWire(f'{output.primitive.name}.addr2', f'{declaration.decl_name}.{output.dahlia_name}_addr2')) + if type == PrimitiveType.Memory4D: + wires.append(FWire(f'{output.primitive.name}.addr3', f'{declaration.decl_name}.{output.dahlia_name}_addr3')) wires.append( FWire(f'{output.primitive.name}.write_data', f'{declaration.decl_name}.{output.dahlia_name}_write_data')) From 4935912db0aa25f17cb8b6b78a20032ee965d46f Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Wed, 2 Dec 2020 13:40:22 -0500 Subject: [PATCH 59/75] Add stdlib --- primitives/std.lib | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/primitives/std.lib b/primitives/std.lib index ad63d71ff8..61e65d7e66 100644 --- a/primitives/std.lib +++ b/primitives/std.lib @@ -63,7 +63,7 @@ primitive std_mem_d2[width, d0_size, d1_size, d0_idx_size, d1_idx_size]( parameter d1_idx_size = 4) (input logic [d0_idx_size-1:0] addr0, input logic [d1_idx_size-1:0] addr1, - input logic [width-1:0] write_data, + input logic [width-1:0] write_data /*verilator public*/, input logic write_en, input logic clk, output logic [width-1:0] read_data, @@ -697,6 +697,28 @@ primitive std_le<"share"=1>[width](left: width, right: width) -> (out: 1) { } } +primitive std_exp(exponent: 32, go: 1, clk: 1) -> (out: 32, done: 1) { + verilog { + module std_exp + (input logic [31:0] exponent, + input logic go, + input logic clk, + output logic [31:0] out, + output logic done); + always_ff @(posedge clk) begin + if (go) begin + /* verilator lint_off REALCVT */ + out = 2.718281 ** exponent; + done = 1; + end else begin + out = 0; + done = 0; + end + end + endmodule + } +} + primitive std_sqrt(in: 32, go: 1, clk: 1) -> (out: 32, done: 1) { verilog { module std_sqrt @@ -810,11 +832,12 @@ primitive fixed_p_std_const[width, int_width, fract_width, value1, value2] () -> module fixed_p_std_const #(parameter width=32, parameter int_width = 8, - parameter fract_width= 24, + parameter fract_width = 24, parameter value1 = 0, parameter value2 = 0) (output logic [width-1:0] out); + /* verilator lint_off WIDTHCONCAT */ assign out = {value1, value2}; endmodule } @@ -966,4 +989,4 @@ primitive fixed_p_std_add_dbit[width, int_width1, fract_width1, int_width2, frac assign out = {whole_int, whole_fract}; endmodule } -} +} \ No newline at end of file From 1bcfefe4b7997bb53b5eeb3a4ec66bbdc762c4c0 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Wed, 2 Dec 2020 17:08:44 -0500 Subject: [PATCH 60/75] Softmax for integer type fixed. --- frontends/relay-futil/dahlia_functions.py | 4 +-- frontends/relay-futil/tests/softmax.expect | 41 +++++++++++++++------- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index e266904fb7..56ed424b51 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -393,8 +393,8 @@ def max_pool2d(declaration): for (let n: ubit<32> = 0..{pool_size[1]}/*pool_size[1]*/) {{ let pool_x: ubit<32> = stride_x + m; let pool_y: ubit<32> = stride_y + n; - let current: {data_type}<{bitwidth}> = {data.name}[i][j][pool_x][pool_y]; - if (current > max) {{ max := current; }} else {{ max := max; }} + let current: {data_type}<{bitwidth}> = {data.name}[i][j][pool_x][pool_y]; + if (current > max) {{ max := current; }} else {{ max := max; }} }} }} {res.name}[i][j][k][l] := max; diff --git a/frontends/relay-futil/tests/softmax.expect b/frontends/relay-futil/tests/softmax.expect index 8fc76f5b1c..92246625a7 100644 --- a/frontends/relay-futil/tests/softmax.expect +++ b/frontends/relay-futil/tests/softmax.expect @@ -27,6 +27,7 @@ component softmax(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_da le1 = prim std_le(4); le2 = prim std_le(4); slice0 = prim std_slice(32, 32); + x1_read0_0 = prim std_reg(32); x_expsum_0 = prim std_reg(32); x_read0_0 = prim std_reg(32); x_read1_0 = prim std_reg(32); @@ -72,11 +73,9 @@ component softmax(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_da bin_read0_0.write_en = 1'd1; let4[done] = bin_read0_0.done; slice0.in = div_pipe0.out; - div_pipe0.left = exp1.out; + div_pipe0.left = x1_read0_0.out; div_pipe0.right = x_expsum_0.out; div_pipe0.go = !div_pipe0.done ? 1'd1; - exp1.exponent = x_read1_0.out; - exp1.go = !exp1.done ? 1'd1; } group upd0<"static"=1> { x_read0_0.write_en = 1'd1; @@ -85,7 +84,7 @@ component softmax(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_da x_read0_0.in = 1'd1 ? x0_0_read_data; upd0[done] = x_read0_0.done ? 1'd1; } - group upd1<"static"=2> { + group upd1 { x_expsum_0.write_en = 1'd1; add0.left = x_expsum_0.out; add0.right = exp0.out; @@ -108,26 +107,42 @@ component softmax(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_da x_read1_0.in = 1'd1 ? x0_0_read_data; upd3[done] = x_read1_0.done ? 1'd1; } - group upd4<"static"=1> { + group upd4 { x10_0_addr1 = k0.out; x10_0_addr0 = i0.out; - x10_0_write_en = 1'd1; - x10_0_write_data = 1'd1 ? bin_read0_0.out; + x10_0_write_en = exp1.done; + exp1.exponent = x_read1_0.out; + exp1.go = !exp1.done ? 1'd1; + x10_0_write_data = exp1.done ? exp1.out; upd4[done] = x10_0_done ? 1'd1; } group upd5<"static"=1> { + x1_read0_0.write_en = 1'd1; + x10_0_addr1 = k0.out; + x10_0_addr0 = i0.out; + x1_read0_0.in = 1'd1 ? x10_0_read_data; + upd5[done] = x1_read0_0.done ? 1'd1; + } + group upd6<"static"=1> { + x10_0_addr1 = k0.out; + x10_0_addr0 = i0.out; + x10_0_write_en = 1'd1; + x10_0_write_data = 1'd1 ? bin_read0_0.out; + upd6[done] = x10_0_done ? 1'd1; + } + group upd7<"static"=1> { k0.write_en = 1'd1; add2.left = k0.out; add2.right = const7.out; k0.in = 1'd1 ? add2.out; - upd5[done] = k0.done ? 1'd1; + upd7[done] = k0.done ? 1'd1; } - group upd6<"static"=1> { + group upd8<"static"=1> { i0.write_en = 1'd1; add3.left = i0.out; add3.right = const8.out; i0.in = 1'd1 ? add3.out; - upd6[done] = i0.done ? 1'd1; + upd8[done] = i0.done ? 1'd1; } } @@ -153,12 +168,14 @@ component softmax(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_da while le2.out with cond2 { seq { upd3; - let4; upd4; upd5; + let4; + upd6; + upd7; } } - upd6; + upd8; } } } From 78d7168afbdfd1b5901f5b9490e7ce6a3d387f54 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Wed, 2 Dec 2020 17:09:48 -0500 Subject: [PATCH 61/75] Remove unnecessary else. --- frontends/relay-futil/dahlia_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 56ed424b51..cfb482d3f5 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -394,7 +394,7 @@ def max_pool2d(declaration): let pool_x: ubit<32> = stride_x + m; let pool_y: ubit<32> = stride_y + n; let current: {data_type}<{bitwidth}> = {data.name}[i][j][pool_x][pool_y]; - if (current > max) {{ max := current; }} else {{ max := max; }} + if (current > max) {{ max := current; }} }} }} {res.name}[i][j][k][l] := max; From 75c149948e6d44ab70068b10b6680ce20efe1c44 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Wed, 2 Dec 2020 20:18:06 -0500 Subject: [PATCH 62/75] Add working max_pool2d. --- frontends/relay-futil/dahlia_functions.py | 22 +-- .../relay-futil/tests/data/max_pool2d.expect | 140 +++++++++--------- .../tests/data/max_pool2d.relay.data | 43 ++++-- 3 files changed, 112 insertions(+), 93 deletions(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index cfb482d3f5..aab69765e7 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -381,23 +381,23 @@ def max_pool2d(declaration): declarations = pp_dahlia_memory_declarations([res, data]) program_body = f""" - for (let i: ubit<32> = 0..{size0}) {{ - for (let j: ubit<32> = 0..{size1}) {{ - for (let k: ubit<32> = 0..{size2}) {{ - for (let l: ubit<32> = 0..{size3}) {{ - let stride_x: ubit<32> = k * {strides[0]}/*stride[0]*/; - let stride_y: ubit<32> = l * {strides[1]}/*stride[1]*/; + for (let b: ubit<32> = 0..{size0}) {{ + for (let c: ubit<32> = 0..{size1}) {{ + for (let y: ubit<32> = 0..{size2}) {{ + for (let x: ubit<32> = 0..{size3}) {{ + let stride_y: ubit<32> = y * {strides[1]}/*strides[1]*/; + let stride_x: ubit<32> = x * {strides[0]}/*strides[0]*/; - let max: {data_type}<{bitwidth}> = {data.name}[i][j][stride_x][stride_y]; + let max: {data_type}<{bitwidth}> = {data.name}[b][c][stride_y][stride_x]; for (let m: ubit<32> = 0..{pool_size[0]}/*pool_size[0]*/) {{ for (let n: ubit<32> = 0..{pool_size[1]}/*pool_size[1]*/) {{ - let pool_x: ubit<32> = stride_x + m; - let pool_y: ubit<32> = stride_y + n; - let current: {data_type}<{bitwidth}> = {data.name}[i][j][pool_x][pool_y]; + let pool_y: ubit<32> = stride_y + m; + let pool_x: ubit<32> = stride_x + n; + let current: {data_type}<{bitwidth}> = {data.name}[b][c][pool_y][pool_x]; if (current > max) {{ max := current; }} }} }} - {res.name}[i][j][k][l] := max; + {res.name}[b][c][y][x] := max; }} }} }} diff --git a/frontends/relay-futil/tests/data/max_pool2d.expect b/frontends/relay-futil/tests/data/max_pool2d.expect index a1c95c7c88..2e4f5739ae 100644 --- a/frontends/relay-futil/tests/data/max_pool2d.expect +++ b/frontends/relay-futil/tests/data/max_pool2d.expect @@ -3,112 +3,108 @@ [ [ [ - 12, - 30, - 34, - 37 + 10, + 20, + 100, + 101 ], [ - 1, - 2, - 3, - 4 + 30, + 40, + 102, + 103 ], [ - 1, - 2, - 3, - 4 + 20, + 30, + 100, + 103 ], [ - 1, - 2, - 3, - 4 + 10, + 40, + 103, + 100 ] - ] - ], - [ + ], [ [ - 8, - 2, - 112, - 4 + 20, + 0, + 70, + 25 ], [ 1, 2, - 3, + 13, 4 ], [ 1, 2, - 3, - 4 + 5, + 6 ], [ - 1, - 2, 3, - 4 + 4, + 7, + 8 ] ] ], [ [ [ - 20, - 0, - 70, - 25 + 11, + 21, + 109, + 10 ], [ - 1, - 2, - 3, - 4 + 31, + 41, + 0, + 14 ], [ + 19, + 42, 1, - 2, - 3, - 4 + 103 ], [ 1, - 2, - 3, - 4 + 18, + 10, + 101 ] - ] - ], - [ + ], [ - [ - 12, - 0, - 100, - 12 - ], [ 1, 2, + 4, + 3 + ], + [ 3, - 4 + 4, + 2, + 1 ], [ - 1, + 4, + 2, 2, - 3, 4 ], [ 1, - 2, 3, - 4 + 3, + 1 ] ] ] @@ -117,43 +113,43 @@ [ [ [ - 30, - 37 + 40, + 103 ], [ - 2, - 4 + 40, + 103 ] ], [ [ - 8, - 112 + 20, + 70 ], [ - 2, - 4 + 4, + 8 ] ] ], [ [ [ - 20, - 70 + 41, + 109 ], [ - 2, - 4 + 42, + 103 ] ], [ [ - 12, - 100 + 4, + 4 ], [ - 2, + 4, 4 ] ] diff --git a/frontends/relay-futil/tests/data/max_pool2d.relay.data b/frontends/relay-futil/tests/data/max_pool2d.relay.data index 9e19e8a053..517b34c9cc 100644 --- a/frontends/relay-futil/tests/data/max_pool2d.relay.data +++ b/frontends/relay-futil/tests/data/max_pool2d.relay.data @@ -1,20 +1,43 @@ { "data": { "data": [ - [[[12,30,34,37], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], [[[8,2,112,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], - [[[20,0,70,25], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], [[[12,0,100,12], [1,2,3,4], [1,2,3,4], [1,2,3,4]]] + [ + [ + [10,20, 100,101], + [30,40, 102,103], + + [20,30, 100,103], + [10,40, 103,100] + ], + [ + [20,0, 70,25], + [1, 2, 13,4], + + [1,2, 5,6], + [3,4, 7,8] + ] + ], + [ + [ + [11,21, 109,10], + [31,41, 0,14], + + [19,42, 1,103], + [1,18, 10,101] + ], + [ + [1,2, 4,3], + [3,4, 2,1], + + [4,2, 2,4], + [1,3, 3,1] + ] + ] ], "bitwidth": 32 }, - "max": { - "data": [0], - "bitwidth": 32 - }, "result": { - "data": [ - [[[0,0], [0,0]], [[0,0], [0,0]]], - [[[0,0], [0,0]], [[0,0], [0,0]]] - ], + "data": [ [[[0,0], [0,0]], [[0,0], [0,0]]], [[[0,0], [0,0]], [[0,0], [0,0]]] ], "bitwidth": 32 } } \ No newline at end of file From d63fd81487946f03630867cf12170f830f3103e2 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Wed, 2 Dec 2020 20:29:54 -0500 Subject: [PATCH 63/75] Add expected output. --- .../tests/data/tensor4d_multiply.expect | 344 ++++++++++++++++++ 1 file changed, 344 insertions(+) diff --git a/frontends/relay-futil/tests/data/tensor4d_multiply.expect b/frontends/relay-futil/tests/data/tensor4d_multiply.expect index e69de29bb2..bd548739d2 100644 --- a/frontends/relay-futil/tests/data/tensor4d_multiply.expect +++ b/frontends/relay-futil/tests/data/tensor4d_multiply.expect @@ -0,0 +1,344 @@ +{ + "x": [ + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ] + ], + "x1": [ + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ] + ], + "x2": [ + [ + [ + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ] + ] + ], + [ + [ + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ] + ] + ], + [ + [ + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ] + ] + ], + [ + [ + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ] + ] + ] + ] +} From 82363ad00d08abeb5f22b559e550b6e6ba8a1493 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Wed, 2 Dec 2020 20:35:52 -0500 Subject: [PATCH 64/75] Remove ellipsis --- frontends/relay-futil/compiler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 5212354512..b432882e04 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -59,9 +59,9 @@ def relay_id(self, name): def dahlia_name(self, name, type): """ Dahlia uses the following naming scheme for arbitrary variables `X`, `Y`: - Memory1D: `X0`, `Y0`, ... - Memory2D: `X0_0`, `Y0_0`, ... - Memory3D: `X0_0_0`, `Y0_0_0`, ... + Memory1D: `X0`, `Y0` + Memory2D: `X0_0`, `Y0_0` + Memory3D: `X0_0_0`, `Y0_0_0` """ assert type in DahliaNameExtension, f'{name} with {type} is not supported yet.' return name + DahliaNameExtension[type] From 83a0cc5357c55132b92d06dc2d4aae194d1561b7 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Thu, 3 Dec 2020 18:25:32 -0500 Subject: [PATCH 65/75] Cleanup, add externalize registry to fud. --- frontends/relay-futil/compiler.py | 4 +- frontends/relay-futil/dahlia_functions.py | 2 +- frontends/relay-futil/example.py | 7 +- frontends/relay-futil/tests/bias_add.expect | 4 + frontends/relay-futil/tests/let2.expect | 1 + frontends/relay-futil/tests/let3.expect | 4 +- frontends/relay-futil/tests/max_pool2d.expect | 243 ++++++++---------- frontends/relay-futil/tests/mlp_net.expect | 51 +++- frontends/relay-futil/tests/relu.expect | 4 + frontends/relay-futil/tests/sqrt.expect | 4 + .../relay-futil/tests/tensor3d_divide.expect | 2 +- fud/fud/main.py | 4 + 12 files changed, 176 insertions(+), 154 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index b432882e04..a6cd71c690 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -147,10 +147,10 @@ def lower_to_futil(program) -> str: program = relay_transforms(program) visitor = Relay2Futil() - PREAMBLE = """import "primitives/std.lib";""" + PREAMBLE = """import "primitives/std.lib";\n""" MAIN = visitor.visit(program) DAHLIA_COMPONENTS = '\n'.join(visitor.dahlia_components) - NEWL = '\n\n' + NEWL = '\n' return f'{PREAMBLE}{NEWL}{DAHLIA_COMPONENTS}{NEWL}{MAIN}' diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index aab69765e7..14dc98ddd4 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -42,7 +42,7 @@ def lower_dahlia_program(prog, component_name): fuse_binary = os.environ['DAHLIA_EXEC'] if 'DAHLIA_EXEC' in os.environ else 'fuse' command = f""" {fuse_binary} {tf0.name} --lower -b=futil -n={component_name} > {tf1.name} {NO_ERR} \ - && cargo run -- {tf1.name} -l ../../ -p externalize > {tf2.name} {NO_ERR}""" + && fud e --from futil {tf1.name} --to futil-externalize > {tf2.name} {NO_ERR}""" subprocess.Popen(command, stdout=subprocess.PIPE, shell=True).communicate() component = tf2.read().decode()[len(IMPORT_STATEMENT):] # Skip over importing the primitives library. return component diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index 69db563726..ca25a9bebd 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -56,7 +56,8 @@ def softmax(): def max_pool2d(): data = relay.var('data', shape=[2, 2, 4, 4], dtype='int32') - return relay.Function([data], relay.nn.max_pool2d(data, padding=[0,0,0,0], strides=[2,2], pool_size=[2,2])) + return relay.Function([data], relay.nn.max_pool2d(data, padding=[0, 0, 0, 0], strides=[2, 2], pool_size=[2, 2])) + def mlp_net(): """The MLP test from Relay.""" @@ -71,8 +72,8 @@ def vgg_net(): batch_norm=True) -ALL_FUNCS = [add, tensor_subtract, expand_dims, batch_flatten, batch_matmul, bias_add, relu, dense, softmax, mlp_net, - vgg_net, max_pool2d] +ALL_FUNCS = [add, tensor_subtract, expand_dims, batch_flatten, batch_matmul, + bias_add, relu, dense, softmax, mlp_net, vgg_net, max_pool2d] FUNC_NAMES = list(map(lambda x: x.__name__, ALL_FUNCS)) diff --git a/frontends/relay-futil/tests/bias_add.expect b/frontends/relay-futil/tests/bias_add.expect index 620da35d44..c181b95bbf 100644 --- a/frontends/relay-futil/tests/bias_add.expect +++ b/frontends/relay-futil/tests/bias_add.expect @@ -175,9 +175,13 @@ component main () -> () { bias_add0.x0_0_0_0_read_data = x.read_data; x.addr1 = bias_add0.x0_0_0_0_addr1; x.addr2 = bias_add0.x0_0_0_0_addr2; + x.addr3 = bias_add0.x0_0_0_0_addr3; bias.addr0 = bias_add0.bias0_addr0; bias_add0.bias0_read_data = bias.read_data; x1.addr0 = bias_add0.x10_0_0_0_addr0; + x1.addr1 = bias_add0.x10_0_0_0_addr1; + x1.addr2 = bias_add0.x10_0_0_0_addr2; + x1.addr3 = bias_add0.x10_0_0_0_addr3; x1.write_data = bias_add0.x10_0_0_0_write_data; x1.write_en = bias_add0.x10_0_0_0_write_en; bias_add0.x10_0_0_0_done = x1.done; diff --git a/frontends/relay-futil/tests/let2.expect b/frontends/relay-futil/tests/let2.expect index b9a9bfd9ec..c4b8afc3cb 100644 --- a/frontends/relay-futil/tests/let2.expect +++ b/frontends/relay-futil/tests/let2.expect @@ -68,6 +68,7 @@ component add(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_ } } } + component multiply(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { cells { a_read0_0 = prim std_reg(32); diff --git a/frontends/relay-futil/tests/let3.expect b/frontends/relay-futil/tests/let3.expect index 11b79b4180..cbb0783fa8 100644 --- a/frontends/relay-futil/tests/let3.expect +++ b/frontends/relay-futil/tests/let3.expect @@ -76,6 +76,7 @@ component multiply(go: 1, clk: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32 } } } + component divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { cells { a_read0_0 = prim std_reg(32); @@ -100,7 +101,7 @@ component divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, i0.write_en = 1'd1; let0[done] = i0.done; } - group let1<> { + group let1 { bin_read0_0.in = div_pipe0.out; bin_read0_0.write_en = div_pipe0.done; let1[done] = bin_read0_0.done; @@ -152,6 +153,7 @@ component divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, } } } + component subtract(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { cells { a_read0_0 = prim std_reg(32); diff --git a/frontends/relay-futil/tests/max_pool2d.expect b/frontends/relay-futil/tests/max_pool2d.expect index cbd0da89b2..47decb83ba 100644 --- a/frontends/relay-futil/tests/max_pool2d.expect +++ b/frontends/relay-futil/tests/max_pool2d.expect @@ -10,8 +10,10 @@ component max_pool2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: add5 = prim std_add(32); add6 = prim std_add(32); add7 = prim std_add(32); + b0 = prim std_reg(32); bin_read0_0 = prim std_reg(32); bin_read1_0 = prim std_reg(32); + c0 = prim std_reg(32); const0 = prim std_const(32, 0); const1 = prim std_const(32, 1); const10 = prim std_const(32, 0); @@ -32,13 +34,8 @@ component max_pool2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: const7 = prim std_const(32, 1); const8 = prim std_const(32, 2); const9 = prim std_const(32, 2); - data_read0_0 = prim std_reg(32); - data_read1_0 = prim std_reg(32); + current_0 = prim std_reg(32); gt0 = prim std_gt(32); - i0 = prim std_reg(32); - j0 = prim std_reg(32); - k0 = prim std_reg(32); - l0 = prim std_reg(32); le0 = prim std_le(32); le1 = prim std_le(32); le2 = prim std_le(32); @@ -54,12 +51,8 @@ component max_pool2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: pool_y_0 = prim std_reg(32); slice0 = prim std_slice(32, 2); slice1 = prim std_slice(32, 2); - slice10 = prim std_slice(32, 3); - slice11 = prim std_slice(32, 3); - slice12 = prim std_slice(32, 2); - slice13 = prim std_slice(32, 2); - slice14 = prim std_slice(32, 2); - slice15 = prim std_slice(32, 2); + slice10 = prim std_slice(32, 2); + slice11 = prim std_slice(32, 2); slice2 = prim std_slice(32, 3); slice3 = prim std_slice(32, 3); slice4 = prim std_slice(32, 2); @@ -68,28 +61,30 @@ component max_pool2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: slice7 = prim std_slice(32, 3); slice8 = prim std_slice(32, 2); slice9 = prim std_slice(32, 2); - stride_k_0 = prim std_reg(32); - stride_l_0 = prim std_reg(32); + stride_x_0 = prim std_reg(32); + stride_y_0 = prim std_reg(32); + x0 = prim std_reg(32); + y0 = prim std_reg(32); } wires { group cond0<"static"=0> { cond0[done] = 1'd1; - le0.left = i0.out; + le0.left = b0.out; le0.right = const1.out; } group cond1<"static"=0> { cond1[done] = 1'd1; - le1.left = j0.out; + le1.left = c0.out; le1.right = const3.out; } group cond2<"static"=0> { cond2[done] = 1'd1; - le2.left = k0.out; + le2.left = y0.out; le2.right = const5.out; } group cond3<"static"=0> { cond3[done] = 1'd1; - le3.left = l0.out; + le3.left = x0.out; le3.right = const7.out; } group cond4<"static"=0> { @@ -104,68 +99,68 @@ component max_pool2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: } group cond6<"static"=0> { cond6[done] = 1'd1; - gt0.left = data_read0_0.out; + gt0.left = current_0.out; gt0.right = max_0.out; } group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; + b0.in = const0.out; + b0.write_en = 1'd1; + let0[done] = b0.done; } group let1<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let1[done] = j0.done; + c0.in = const2.out; + c0.write_en = 1'd1; + let1[done] = c0.done; } group let10<"static"=1> { - pool_x_0.in = add0.out; - pool_x_0.write_en = 1'd1; - let10[done] = pool_x_0.done; - add0.left = stride_k_0.out; + pool_y_0.in = add0.out; + pool_y_0.write_en = 1'd1; + let10[done] = pool_y_0.done; + add0.left = stride_y_0.out; add0.right = m0.out; } group let11<"static"=1> { - pool_y_0.in = add1.out; - pool_y_0.write_en = 1'd1; - let11[done] = pool_y_0.done; - add1.left = stride_l_0.out; + pool_x_0.in = add1.out; + pool_x_0.write_en = 1'd1; + let11[done] = pool_x_0.done; + add1.left = stride_x_0.out; add1.right = n0.out; } group let2<"static"=1> { - k0.in = const4.out; - k0.write_en = 1'd1; - let2[done] = k0.done; + y0.in = const4.out; + y0.write_en = 1'd1; + let2[done] = y0.done; } group let3<"static"=1> { - l0.in = const6.out; - l0.write_en = 1'd1; - let3[done] = l0.done; + x0.in = const6.out; + x0.write_en = 1'd1; + let3[done] = x0.done; } group let4<"static"=4> { bin_read0_0.in = mult_pipe0.out; bin_read0_0.write_en = mult_pipe0.done; let4[done] = bin_read0_0.done; - mult_pipe0.left = k0.out; + mult_pipe0.left = y0.out; mult_pipe0.right = const8.out; mult_pipe0.go = !mult_pipe0.done ? 1'd1; } group let5<"static"=1> { - stride_k_0.in = bin_read0_0.out; - stride_k_0.write_en = 1'd1; - let5[done] = stride_k_0.done; + stride_y_0.in = bin_read0_0.out; + stride_y_0.write_en = 1'd1; + let5[done] = stride_y_0.done; } group let6<"static"=4> { bin_read1_0.in = mult_pipe1.out; bin_read1_0.write_en = mult_pipe1.done; let6[done] = bin_read1_0.done; - mult_pipe1.left = l0.out; + mult_pipe1.left = x0.out; mult_pipe1.right = const9.out; mult_pipe1.go = !mult_pipe1.done ? 1'd1; } group let7<"static"=1> { - stride_l_0.in = bin_read1_0.out; - stride_l_0.write_en = 1'd1; - let7[done] = stride_l_0.done; + stride_x_0.in = bin_read1_0.out; + stride_x_0.write_en = 1'd1; + let7[done] = stride_x_0.done; } group let8<"static"=1> { m0.in = const10.out; @@ -180,101 +175,88 @@ component max_pool2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: group upd0<"static"=1> { max_0.write_en = 1'd1; data0_0_0_0_addr3 = slice3.out; - slice3.in = stride_l_0.out; + slice3.in = stride_x_0.out; data0_0_0_0_addr2 = slice2.out; - slice2.in = stride_k_0.out; + slice2.in = stride_y_0.out; data0_0_0_0_addr1 = slice1.out; - slice1.in = j0.out; + slice1.in = c0.out; data0_0_0_0_addr0 = slice0.out; - slice0.in = i0.out; + slice0.in = b0.out; max_0.in = 1'd1 ? data0_0_0_0_read_data; upd0[done] = max_0.done ? 1'd1; } group upd1<"static"=1> { - data_read0_0.write_en = 1'd1; + current_0.write_en = 1'd1; data0_0_0_0_addr3 = slice7.out; - slice7.in = pool_y_0.out; + slice7.in = pool_x_0.out; data0_0_0_0_addr2 = slice6.out; - slice6.in = pool_x_0.out; + slice6.in = pool_y_0.out; data0_0_0_0_addr1 = slice5.out; - slice5.in = j0.out; + slice5.in = c0.out; data0_0_0_0_addr0 = slice4.out; - slice4.in = i0.out; - data_read0_0.in = 1'd1 ? data0_0_0_0_read_data; - upd1[done] = data_read0_0.done ? 1'd1; - } - group upd10<"static"=1> { - i0.write_en = 1'd1; - add7.left = i0.out; - add7.right = const19.out; - i0.in = 1'd1 ? add7.out; - upd10[done] = i0.done ? 1'd1; + slice4.in = b0.out; + current_0.in = 1'd1 ? data0_0_0_0_read_data; + upd1[done] = current_0.done ? 1'd1; } group upd2<"static"=1> { - data_read1_0.write_en = 1'd1; - data0_0_0_0_addr3 = slice11.out; - slice11.in = pool_y_0.out; - data0_0_0_0_addr2 = slice10.out; - slice10.in = pool_x_0.out; - data0_0_0_0_addr1 = slice9.out; - slice9.in = j0.out; - data0_0_0_0_addr0 = slice8.out; - slice8.in = i0.out; - data_read1_0.in = 1'd1 ? data0_0_0_0_read_data; - upd2[done] = data_read1_0.done ? 1'd1; - } - group upd3<"static"=1> { max_0.write_en = 1'd1; - max_0.in = 1'd1 ? data_read1_0.out; - upd3[done] = max_0.done ? 1'd1; + max_0.in = 1'd1 ? current_0.out; + upd2[done] = max_0.done ? 1'd1; } - group upd4<"static"=1> { + group upd3<"static"=1> { n0.write_en = 1'd1; add2.left = n0.out; add2.right = const14.out; n0.in = 1'd1 ? add2.out; - upd4[done] = n0.done ? 1'd1; + upd3[done] = n0.done ? 1'd1; } - group upd5<"static"=1> { + group upd4<"static"=1> { m0.write_en = 1'd1; add3.left = m0.out; add3.right = const15.out; m0.in = 1'd1 ? add3.out; - upd5[done] = m0.done ? 1'd1; + upd4[done] = m0.done ? 1'd1; } - group upd6<"static"=1> { - result0_0_0_0_addr3 = slice15.out; - slice15.in = l0.out; - result0_0_0_0_addr2 = slice14.out; - slice14.in = k0.out; - result0_0_0_0_addr1 = slice13.out; - slice13.in = j0.out; - result0_0_0_0_addr0 = slice12.out; - slice12.in = i0.out; + group upd5<"static"=1> { + result0_0_0_0_addr3 = slice11.out; + slice11.in = x0.out; + result0_0_0_0_addr2 = slice10.out; + slice10.in = y0.out; + result0_0_0_0_addr1 = slice9.out; + slice9.in = c0.out; + result0_0_0_0_addr0 = slice8.out; + slice8.in = b0.out; result0_0_0_0_write_en = 1'd1; result0_0_0_0_write_data = 1'd1 ? max_0.out; - upd6[done] = result0_0_0_0_done ? 1'd1; + upd5[done] = result0_0_0_0_done ? 1'd1; } - group upd7<"static"=1> { - l0.write_en = 1'd1; - add4.left = l0.out; + group upd6<"static"=1> { + x0.write_en = 1'd1; + add4.left = x0.out; add4.right = const16.out; - l0.in = 1'd1 ? add4.out; - upd7[done] = l0.done ? 1'd1; + x0.in = 1'd1 ? add4.out; + upd6[done] = x0.done ? 1'd1; } - group upd8<"static"=1> { - k0.write_en = 1'd1; - add5.left = k0.out; + group upd7<"static"=1> { + y0.write_en = 1'd1; + add5.left = y0.out; add5.right = const17.out; - k0.in = 1'd1 ? add5.out; - upd8[done] = k0.done ? 1'd1; + y0.in = 1'd1 ? add5.out; + upd7[done] = y0.done ? 1'd1; } - group upd9<"static"=1> { - j0.write_en = 1'd1; - add6.left = j0.out; + group upd8<"static"=1> { + c0.write_en = 1'd1; + add6.left = c0.out; add6.right = const18.out; - j0.in = 1'd1 ? add6.out; - upd9[done] = j0.done ? 1'd1; + c0.in = 1'd1 ? add6.out; + upd8[done] = c0.done ? 1'd1; + } + group upd9<"static"=1> { + b0.write_en = 1'd1; + add7.left = b0.out; + add7.right = const19.out; + b0.in = 1'd1 ? add7.out; + upd9[done] = b0.done ? 1'd1; } } @@ -303,44 +285,37 @@ component max_pool2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: } } upd0; - par { + let8; + while le4.out with cond4 { seq { - let8; - while le4.out with cond4 { + let9; + while le5.out with cond5 { seq { - let9; - while le5.out with cond5 { - seq { - par { - let10; - let11; - } - upd1; - if gt0.out with cond6 { - seq { - upd2; - upd3; - } - } - upd4; - } + par { + let10; + let11; + } + upd1; + if gt0.out with cond6 { + upd2; } - upd5; + upd3; } } + upd4; } - upd6; } - upd7; + upd5; + upd6; } } - upd8; + upd7; } } - upd9; + upd8; } } - upd10; + upd9; } } } diff --git a/frontends/relay-futil/tests/mlp_net.expect b/frontends/relay-futil/tests/mlp_net.expect index b780037235..9f7c781f88 100644 --- a/frontends/relay-futil/tests/mlp_net.expect +++ b/frontends/relay-futil/tests/mlp_net.expect @@ -30,6 +30,7 @@ component softmax(go: 1, clk: 1, x80_0_read_data: 32, x80_0_done: 1, x90_0_read_ x8_expsum_0 = prim std_reg(32); x8_read0_0 = prim std_reg(32); x8_read1_0 = prim std_reg(32); + x9_read0_0 = prim std_reg(32); } wires { group cond0<"static"=0> { @@ -72,11 +73,9 @@ component softmax(go: 1, clk: 1, x80_0_read_data: 32, x80_0_done: 1, x90_0_read_ bin_read0_0.write_en = 1'd1; let4[done] = bin_read0_0.done; slice0.in = div_pipe0.out; - div_pipe0.left = exp1.out; + div_pipe0.left = x9_read0_0.out; div_pipe0.right = x8_expsum_0.out; div_pipe0.go = !div_pipe0.done ? 1'd1; - exp1.exponent = x8_read1_0.out; - exp1.go = !exp1.done ? 1'd1; } group upd0<"static"=1> { x8_read0_0.write_en = 1'd1; @@ -85,7 +84,7 @@ component softmax(go: 1, clk: 1, x80_0_read_data: 32, x80_0_done: 1, x90_0_read_ x8_read0_0.in = 1'd1 ? x80_0_read_data; upd0[done] = x8_read0_0.done ? 1'd1; } - group upd1<"static"=2> { + group upd1 { x8_expsum_0.write_en = 1'd1; add0.left = x8_expsum_0.out; add0.right = exp0.out; @@ -108,26 +107,42 @@ component softmax(go: 1, clk: 1, x80_0_read_data: 32, x80_0_done: 1, x90_0_read_ x8_read1_0.in = 1'd1 ? x80_0_read_data; upd3[done] = x8_read1_0.done ? 1'd1; } - group upd4<"static"=1> { + group upd4 { x90_0_addr1 = k0.out; x90_0_addr0 = i0.out; - x90_0_write_en = 1'd1; - x90_0_write_data = 1'd1 ? bin_read0_0.out; + x90_0_write_en = exp1.done; + exp1.exponent = x8_read1_0.out; + exp1.go = !exp1.done ? 1'd1; + x90_0_write_data = exp1.done ? exp1.out; upd4[done] = x90_0_done ? 1'd1; } group upd5<"static"=1> { + x9_read0_0.write_en = 1'd1; + x90_0_addr1 = k0.out; + x90_0_addr0 = i0.out; + x9_read0_0.in = 1'd1 ? x90_0_read_data; + upd5[done] = x9_read0_0.done ? 1'd1; + } + group upd6<"static"=1> { + x90_0_addr1 = k0.out; + x90_0_addr0 = i0.out; + x90_0_write_en = 1'd1; + x90_0_write_data = 1'd1 ? bin_read0_0.out; + upd6[done] = x90_0_done ? 1'd1; + } + group upd7<"static"=1> { k0.write_en = 1'd1; add2.left = k0.out; add2.right = const7.out; k0.in = 1'd1 ? add2.out; - upd5[done] = k0.done ? 1'd1; + upd7[done] = k0.done ? 1'd1; } - group upd6<"static"=1> { + group upd8<"static"=1> { i0.write_en = 1'd1; add3.left = i0.out; add3.right = const8.out; i0.in = 1'd1 ? add3.out; - upd6[done] = i0.done ? 1'd1; + upd8[done] = i0.done ? 1'd1; } } @@ -153,17 +168,20 @@ component softmax(go: 1, clk: 1, x80_0_read_data: 32, x80_0_done: 1, x90_0_read_ while le2.out with cond2 { seq { upd3; - let4; upd4; upd5; + let4; + upd6; + upd7; } } - upd6; + upd8; } } } } } + component bias_add2(go: 1, clk: 1, fc3_bias0_read_data: 32, fc3_bias0_done: 1, x70_0_read_data: 32, x70_0_done: 1, x80_0_read_data: 32, x80_0_done: 1) -> (done: 1, fc3_bias0_addr0: 4, fc3_bias0_write_data: 32, fc3_bias0_write_en: 1, fc3_bias0_clk: 1, x70_0_addr0: 1, x70_0_addr1: 4, x70_0_write_data: 32, x70_0_write_en: 1, x70_0_clk: 1, x80_0_addr0: 1, x80_0_addr1: 4, x80_0_write_data: 32, x80_0_write_en: 1, x80_0_clk: 1) { cells { add0 = prim fixed_p_std_add(32, 16, 16); @@ -263,6 +281,7 @@ component bias_add2(go: 1, clk: 1, fc3_bias0_read_data: 32, fc3_bias0_done: 1, x } } } + component dense2(go: 1, clk: 1, fc3_weight0_0_read_data: 32, fc3_weight0_0_done: 1, x60_0_read_data: 32, x60_0_done: 1, x70_0_read_data: 32, x70_0_done: 1) -> (done: 1, fc3_weight0_0_addr0: 4, fc3_weight0_0_addr1: 7, fc3_weight0_0_write_data: 32, fc3_weight0_0_write_en: 1, fc3_weight0_0_clk: 1, x60_0_addr0: 1, x60_0_addr1: 7, x60_0_write_data: 32, x60_0_write_en: 1, x60_0_clk: 1, x70_0_addr0: 1, x70_0_addr1: 4, x70_0_write_data: 32, x70_0_write_en: 1, x70_0_clk: 1) { cells { add0 = prim std_add(7); @@ -568,6 +587,7 @@ component dense2(go: 1, clk: 1, fc3_weight0_0_read_data: 32, fc3_weight0_0_done: } } } + component relu1(go: 1, clk: 1, x50_0_read_data: 32, x50_0_done: 1, x60_0_read_data: 32, x60_0_done: 1) -> (done: 1, x50_0_addr0: 1, x50_0_addr1: 7, x50_0_write_data: 32, x50_0_write_en: 1, x50_0_clk: 1, x60_0_addr0: 1, x60_0_addr1: 7, x60_0_write_data: 32, x60_0_write_en: 1, x60_0_clk: 1) { cells { add0 = prim std_add(7); @@ -690,6 +710,7 @@ component relu1(go: 1, clk: 1, x50_0_read_data: 32, x50_0_done: 1, x60_0_read_da } } } + component bias_add1(go: 1, clk: 1, fc2_bias0_read_data: 32, fc2_bias0_done: 1, x40_0_read_data: 32, x40_0_done: 1, x50_0_read_data: 32, x50_0_done: 1) -> (done: 1, fc2_bias0_addr0: 7, fc2_bias0_write_data: 32, fc2_bias0_write_en: 1, fc2_bias0_clk: 1, x40_0_addr0: 1, x40_0_addr1: 7, x40_0_write_data: 32, x40_0_write_en: 1, x40_0_clk: 1, x50_0_addr0: 1, x50_0_addr1: 7, x50_0_write_data: 32, x50_0_write_en: 1, x50_0_clk: 1) { cells { add0 = prim fixed_p_std_add(32, 16, 16); @@ -789,6 +810,7 @@ component bias_add1(go: 1, clk: 1, fc2_bias0_read_data: 32, fc2_bias0_done: 1, x } } } + component dense1(go: 1, clk: 1, fc2_weight0_0_read_data: 32, fc2_weight0_0_done: 1, x30_0_read_data: 32, x30_0_done: 1, x40_0_read_data: 32, x40_0_done: 1) -> (done: 1, fc2_weight0_0_addr0: 7, fc2_weight0_0_addr1: 8, fc2_weight0_0_write_data: 32, fc2_weight0_0_write_en: 1, fc2_weight0_0_clk: 1, x30_0_addr0: 1, x30_0_addr1: 8, x30_0_write_data: 32, x30_0_write_en: 1, x30_0_clk: 1, x40_0_addr0: 1, x40_0_addr1: 7, x40_0_write_data: 32, x40_0_write_en: 1, x40_0_clk: 1) { cells { add0 = prim std_add(8); @@ -1094,6 +1116,7 @@ component dense1(go: 1, clk: 1, fc2_weight0_0_read_data: 32, fc2_weight0_0_done: } } } + component relu(go: 1, clk: 1, x20_0_read_data: 32, x20_0_done: 1, x30_0_read_data: 32, x30_0_done: 1) -> (done: 1, x20_0_addr0: 1, x20_0_addr1: 8, x20_0_write_data: 32, x20_0_write_en: 1, x20_0_clk: 1, x30_0_addr0: 1, x30_0_addr1: 8, x30_0_write_data: 32, x30_0_write_en: 1, x30_0_clk: 1) { cells { add0 = prim std_add(8); @@ -1216,6 +1239,7 @@ component relu(go: 1, clk: 1, x20_0_read_data: 32, x20_0_done: 1, x30_0_read_dat } } } + component bias_add(go: 1, clk: 1, fc1_bias0_read_data: 32, fc1_bias0_done: 1, x10_0_read_data: 32, x10_0_done: 1, x20_0_read_data: 32, x20_0_done: 1) -> (done: 1, fc1_bias0_addr0: 8, fc1_bias0_write_data: 32, fc1_bias0_write_en: 1, fc1_bias0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 8, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, x20_0_addr0: 1, x20_0_addr1: 8, x20_0_write_data: 32, x20_0_write_en: 1, x20_0_clk: 1) { cells { add0 = prim fixed_p_std_add(32, 16, 16); @@ -1315,6 +1339,7 @@ component bias_add(go: 1, clk: 1, fc1_bias0_read_data: 32, fc1_bias0_done: 1, x1 } } } + component dense(go: 1, clk: 1, fc1_weight0_0_read_data: 32, fc1_weight0_0_done: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, fc1_weight0_0_addr0: 8, fc1_weight0_0_addr1: 10, fc1_weight0_0_write_data: 32, fc1_weight0_0_write_en: 1, fc1_weight0_0_clk: 1, x0_0_addr0: 1, x0_0_addr1: 10, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 8, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { cells { add0 = prim std_add(10); @@ -1620,6 +1645,7 @@ component dense(go: 1, clk: 1, fc1_weight0_0_read_data: 32, fc1_weight0_0_done: } } } + component batch_flatten(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, x0_0_read_data: 32, x0_0_done: 1) -> (done: 1, data0_0_0_0_addr0: 1, data0_0_0_0_addr1: 1, data0_0_0_0_addr2: 5, data0_0_0_0_addr3: 5, data0_0_0_0_write_data: 32, data0_0_0_0_write_en: 1, data0_0_0_0_clk: 1, x0_0_addr0: 1, x0_0_addr1: 10, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1) { cells { add0 = prim std_add(10); @@ -1821,6 +1847,7 @@ component main () -> () { batch_flatten0.data0_0_0_0_read_data = data.read_data; data.addr1 = batch_flatten0.data0_0_0_0_addr1; data.addr2 = batch_flatten0.data0_0_0_0_addr2; + data.addr3 = batch_flatten0.data0_0_0_0_addr3; x.addr0 = batch_flatten0.x0_0_addr0; x.addr1 = batch_flatten0.x0_0_addr1; x.write_data = batch_flatten0.x0_0_write_data; diff --git a/frontends/relay-futil/tests/relu.expect b/frontends/relay-futil/tests/relu.expect index 74b5646d9b..7a65c37f5a 100644 --- a/frontends/relay-futil/tests/relu.expect +++ b/frontends/relay-futil/tests/relu.expect @@ -201,7 +201,11 @@ component main () -> () { relu0.x0_0_0_0_read_data = x.read_data; x.addr1 = relu0.x0_0_0_0_addr1; x.addr2 = relu0.x0_0_0_0_addr2; + x.addr3 = relu0.x0_0_0_0_addr3; x1.addr0 = relu0.x10_0_0_0_addr0; + x1.addr1 = relu0.x10_0_0_0_addr1; + x1.addr2 = relu0.x10_0_0_0_addr2; + x1.addr3 = relu0.x10_0_0_0_addr3; x1.write_data = relu0.x10_0_0_0_write_data; x1.write_en = relu0.x10_0_0_0_write_en; relu0.x10_0_0_0_done = x1.done; diff --git a/frontends/relay-futil/tests/sqrt.expect b/frontends/relay-futil/tests/sqrt.expect index edb40c6259..2963943f4f 100644 --- a/frontends/relay-futil/tests/sqrt.expect +++ b/frontends/relay-futil/tests/sqrt.expect @@ -164,7 +164,11 @@ component main () -> () { sqrt0.x0_0_0_0_read_data = x.read_data; x.addr1 = sqrt0.x0_0_0_0_addr1; x.addr2 = sqrt0.x0_0_0_0_addr2; + x.addr3 = sqrt0.x0_0_0_0_addr3; x1.addr0 = sqrt0.x10_0_0_0_addr0; + x1.addr1 = sqrt0.x10_0_0_0_addr1; + x1.addr2 = sqrt0.x10_0_0_0_addr2; + x1.addr3 = sqrt0.x10_0_0_0_addr3; x1.write_data = sqrt0.x10_0_0_0_write_data; x1.write_en = sqrt0.x10_0_0_0_write_en; sqrt0.x10_0_0_0_done = x1.done; diff --git a/frontends/relay-futil/tests/tensor3d_divide.expect b/frontends/relay-futil/tests/tensor3d_divide.expect index 5058296dd8..a823a0ff96 100644 --- a/frontends/relay-futil/tests/tensor3d_divide.expect +++ b/frontends/relay-futil/tests/tensor3d_divide.expect @@ -56,7 +56,7 @@ component divide(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_0_re k0.write_en = 1'd1; let2[done] = k0.done; } - group let3<> { + group let3 { bin_read0_0.in = div_pipe0.out; bin_read0_0.write_en = div_pipe0.done; let3[done] = bin_read0_0.done; diff --git a/fud/fud/main.py b/fud/fud/main.py index 8d4cd2687c..6f2ba5810c 100644 --- a/fud/fud/main.py +++ b/fud/fud/main.py @@ -36,6 +36,10 @@ def register_stages(registry, config): futil.FutilStage(config, 'futil-noinline', '-b futil -d hole-inliner', 'Compile FuTIL to FuTIL to remove all control and inline groups')) + registry.register( + futil.FutilStage(config, 'futil-externalize', '-b futil -p externalize', + 'Compile FuTIL to FuTIL to externalize all external memories primitives')) + # Verilator registry.register( verilator.VerilatorStage(config, 'vcd', From fd3f133e3082f0637be37797e10995c5df2ad2a5 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Thu, 3 Dec 2020 21:49:00 -0500 Subject: [PATCH 66/75] Initial conv2d commit. --- frontends/relay-futil/compiler.py | 2 +- frontends/relay-futil/dahlia_functions.py | 54 ++++++++++++++++--- frontends/relay-futil/example.py | 8 ++- frontends/relay-futil/tests/conv2d.expect | 0 frontends/relay-futil/tests/conv2d.relay | 6 +++ .../relay-futil/tests/data/conv2d.expect | 0 frontends/relay-futil/tests/data/conv2d.relay | 5 ++ .../relay-futil/tests/data/conv2d.relay.data | 15 ++++++ 8 files changed, 81 insertions(+), 9 deletions(-) create mode 100644 frontends/relay-futil/tests/conv2d.expect create mode 100644 frontends/relay-futil/tests/conv2d.relay create mode 100644 frontends/relay-futil/tests/data/conv2d.expect create mode 100644 frontends/relay-futil/tests/data/conv2d.relay create mode 100644 frontends/relay-futil/tests/data/conv2d.relay.data diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index a6cd71c690..4b8b5c22e8 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -15,7 +15,7 @@ # Mapping from Relay function names to their respective Dahlia lowering. RelayFunctionCalls = {'nn.dense': dense, 'nn.batch_flatten': batch_flatten, 'nn.batch_matmul': batch_matmul, 'nn.bias_add': bias_add, 'nn.relu': relu, 'nn.softmax': softmax, 'nn.max_pool2d': max_pool2d, - 'negative': negative, 'expand_dims': expand_dims, 'sqrt': sqrt} + 'nn.conv2d': conv2d, 'negative': negative, 'expand_dims': expand_dims, 'sqrt': sqrt} # Mapping between primitive type and associated Dahlia name extension. # E.g. A 2D memory primitive named `A` will be lowered to `A0_0`. diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 14dc98ddd4..15cc65e38b 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -300,8 +300,6 @@ def batch_matmul(declaration): # of the matrix multiply. Otherwise, the values aren't computed properly. Look deeper into this. def dense(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.dense""" - # TODO(cgyurgyik): Add support for `units`. - units = declaration.attributes.get_int("units") op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive bitwidth, M1_size0, M1_size1 = op1.data[0], op1.data[1], op1.data[2] M1_index_size0, M1_index_size1 = op1.data[3], op1.data[4] @@ -352,7 +350,9 @@ def softmax(declaration): body = f""" for (let i: ubit<{index_size0}> = 0..{size0}) {{ let {op.name}_expsum: {data_type}<{bitwidth}> = {zero}; - for (let j: ubit<{index_size1}> = 0..{size1}) {{ {op.name}_expsum += exp({op.name}[i][j]); }} + for (let j: ubit<{index_size1}> = 0..{size1}) {{ + {op.name}_expsum += exp({op.name}[i][j]); + }} for (let k: ubit<{index_size1}> = 0..{size1}) {{ {res.name}[i][k] := exp({op.name}[i][k]); --- @@ -370,10 +370,8 @@ def max_pool2d(declaration): strides = declaration.attributes.get_int_tuple("strides") pool_size = declaration.attributes.get_int_tuple("pool_size") - padding = declaration.attributes.get_int_tuple("padding") layout = declaration.attributes.get_str("layout") ceil_mode = declaration.attributes.get_int("ceil_mode") - for p in padding: assert p == 0, f"Non-zero padding: {padding} is not currently supported for nn.max_pool2d" assert layout == 'NCHW', f"Layout \'{layout}\' is not currently supported for nn.max_pool2d; please use `NCHW`" assert ceil_mode == False, "`ceil_mode` is not currently supported for nn.max_pool2d" bitwidth, data_type = data.data[0], data.data_type @@ -385,8 +383,8 @@ def max_pool2d(declaration): for (let c: ubit<32> = 0..{size1}) {{ for (let y: ubit<32> = 0..{size2}) {{ for (let x: ubit<32> = 0..{size3}) {{ - let stride_y: ubit<32> = y * {strides[1]}/*strides[1]*/; - let stride_x: ubit<32> = x * {strides[0]}/*strides[0]*/; + let stride_y: ubit<32> = y * {strides[0]}/*strides[0]*/; + let stride_x: ubit<32> = x * {strides[1]}/*strides[1]*/; let max: {data_type}<{bitwidth}> = {data.name}[b][c][stride_y][stride_x]; for (let m: ubit<32> = 0..{pool_size[0]}/*pool_size[0]*/) {{ @@ -405,3 +403,45 @@ def max_pool2d(declaration): """ program = f"""{declarations}{NEWL}{program_body}""" return lower_dahlia_program(program, declaration.component_name) + + +# Only supports a small subset of the `conv2d` function. For example, +# dilation and grouped convlution are not supported. +def conv2d(declaration): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.conv2d""" + data, weight, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive + + strides = declaration.attributes.get_int_tuple("strides") + kernel_size = declaration.attributes.get_int_tuple("kernel_size") + channels = declaration.attributes.get_int("channels") + bitwidth, data_type = data.data[0], data.data_type + size0, size1, size2, size3 = res.data[1], res.data[2], res.data[3], res.data[4] + + declarations = pp_dahlia_memory_declarations([res, data, weight]) + + zero = '0.0' if data_type == 'ufix' or data_type == 'fix' else '0' + program_body = f""" + for (let b: ubit<32> = 0..{size0}) {{ + for (let c: ubit<32> = 0..{size1}) {{ + for (let y: ubit<32> = 0..{size2}) {{ + for (let x: ubit<32> = 0..{size3}) {{ + let weighted_sum: {data_type}<{bitwidth}> = {zero}; + + for (let k: ubit<32> = 0..{channels}) {{ + for (let dy: ubit<32> = 0..{kernel_size[1]}/*kernel_size[1]*/) {{ + for (let dx: ubit<32> = 0..{kernel_size[0]}/*kernel_size[0]*/) {{ + let kernel_y: ubit<32> = /*strides[0]*/{strides[0]} * y + dy; + let kernel_x: ubit<32> = /*strides[1]*/{strides[1]} * x + dx; + weighted_sum += {data.name}[b][k][kernel_y][kernel_x] * {weight.name}[c][k][dy][dx]; + }} + }} + }} + {res.name}[b][c][y][x] := weighted_sum; + }} + }} + }} + }} + """ + program = f"""{declarations}{NEWL}{program_body}""" + + return lower_dahlia_program(program, declaration.component_name) diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index ca25a9bebd..34dc9120ee 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -59,6 +59,12 @@ def max_pool2d(): return relay.Function([data], relay.nn.max_pool2d(data, padding=[0, 0, 0, 0], strides=[2, 2], pool_size=[2, 2])) +def conv2d(): + d = relay.var('data', shape=[1,2,2,2], dtype='int32') + w = relay.var('weight', shape=[1,2,2,2], dtype='int32') + return relay.Function([d, w], relay.nn.conv2d(d, w, padding=[1, 1, 1, 1], channels=1, kernel_size=[2,2])) + + def mlp_net(): """The MLP test from Relay.""" from tvm.relay.testing import mlp @@ -73,7 +79,7 @@ def vgg_net(): ALL_FUNCS = [add, tensor_subtract, expand_dims, batch_flatten, batch_matmul, - bias_add, relu, dense, softmax, mlp_net, vgg_net, max_pool2d] + bias_add, relu, dense, softmax, conv2d, max_pool2d, mlp_net, vgg_net] FUNC_NAMES = list(map(lambda x: x.__name__, ALL_FUNCS)) diff --git a/frontends/relay-futil/tests/conv2d.expect b/frontends/relay-futil/tests/conv2d.expect new file mode 100644 index 0000000000..e69de29bb2 diff --git a/frontends/relay-futil/tests/conv2d.relay b/frontends/relay-futil/tests/conv2d.relay new file mode 100644 index 0000000000..f59a6cef8f --- /dev/null +++ b/frontends/relay-futil/tests/conv2d.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%data: Tensor[(5, 512, 14, 14), int32], %weight: Tensor[(512, 512, 3, 3), int32]) -> Tensor[(5, 512, 14, 14), int32] { + let %x: Tensor[(5, 512, 14, 14), int32] = nn.conv2d(%data, %weight, padding=[1, 1, 1, 1], channels=512, kernel_size=[3, 3]) /* ty=Tensor[(5, 512, 14, 14), int32] */; + %x +} + diff --git a/frontends/relay-futil/tests/data/conv2d.expect b/frontends/relay-futil/tests/data/conv2d.expect new file mode 100644 index 0000000000..e69de29bb2 diff --git a/frontends/relay-futil/tests/data/conv2d.relay b/frontends/relay-futil/tests/data/conv2d.relay new file mode 100644 index 0000000000..d85f4aae0e --- /dev/null +++ b/frontends/relay-futil/tests/data/conv2d.relay @@ -0,0 +1,5 @@ +v0.0.4 +fn (%data: Tensor[(2, 2, 2, 2), int32], %weight: Tensor[(2, 2, 2, 2), int32]) { + let %x = nn.conv2d(%data, %weight, padding=[1, 1, 1, 1], channels=2, kernel_size=[2, 2]); + %x +} diff --git a/frontends/relay-futil/tests/data/conv2d.relay.data b/frontends/relay-futil/tests/data/conv2d.relay.data new file mode 100644 index 0000000000..c149ae31ea --- /dev/null +++ b/frontends/relay-futil/tests/data/conv2d.relay.data @@ -0,0 +1,15 @@ +{ + "data": { + "data": [ [[[1,0], [0,11]], [[10,4], [11,14]]], [[[10,100], [0,0]], [[0,10], [0,4]]] ], + "bitwidth": 32 + }, + "weight": { + "data": [ [[[2,1], [3,4]], [[5,5], [0,3]]], [[[2,1], [4,4]], [[19,0], [20,0]]] ], + "bitwidth": 32 + }, + "x": { + "data": [ [[[0,0,0], [0,0,0], [0,0,0]], [[0,0,0], [0,0,0], [0,0,0]]], + [[[0,0,0], [0,0,0], [0,0,0]], [[0,0,0], [0,0,0], [0,0,0]]] ], + "bitwidth": 32 + } +} \ No newline at end of file From 2bc240eee34d886495f0248bc24c15fc613ccae1 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Thu, 3 Dec 2020 21:51:39 -0500 Subject: [PATCH 67/75] Update conv2d expect. --- frontends/relay-futil/tests/conv2d.expect | 395 ++++++++++++++++++++++ frontends/relay-futil/tests/conv2d.relay | 4 +- 2 files changed, 397 insertions(+), 2 deletions(-) diff --git a/frontends/relay-futil/tests/conv2d.expect b/frontends/relay-futil/tests/conv2d.expect index e69de29bb2..a9fa6b4dfd 100644 --- a/frontends/relay-futil/tests/conv2d.expect +++ b/frontends/relay-futil/tests/conv2d.expect @@ -0,0 +1,395 @@ +import "primitives/std.lib"; + +component conv2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, weight0_0_0_0_read_data: 32, weight0_0_0_0_done: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1) -> (done: 1, data0_0_0_0_addr0: 3, data0_0_0_0_addr1: 10, data0_0_0_0_addr2: 4, data0_0_0_0_addr3: 4, data0_0_0_0_write_data: 32, data0_0_0_0_write_en: 1, data0_0_0_0_clk: 1, weight0_0_0_0_addr0: 10, weight0_0_0_0_addr1: 10, weight0_0_0_0_addr2: 2, weight0_0_0_0_addr3: 2, weight0_0_0_0_write_data: 32, weight0_0_0_0_write_en: 1, weight0_0_0_0_clk: 1, x0_0_0_0_addr0: 3, x0_0_0_0_addr1: 10, x0_0_0_0_addr2: 4, x0_0_0_0_addr3: 4, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1) { + cells { + add0 = prim std_add(32); + add1 = prim std_add(32); + add2 = prim fixed_p_std_add(32, 16, 16); + add3 = prim std_add(32); + add4 = prim std_add(32); + add5 = prim std_add(32); + add6 = prim std_add(32); + add7 = prim std_add(32); + add8 = prim std_add(32); + add9 = prim std_add(32); + b0 = prim std_reg(32); + bin_read0_0 = prim std_reg(32); + bin_read1_0 = prim std_reg(32); + bin_read2_0 = prim std_reg(32); + c0 = prim std_reg(32); + const0 = prim std_const(32, 0); + const1 = prim std_const(32, 4); + const10 = prim std_const(32, 0); + const11 = prim std_const(32, 2); + const12 = prim std_const(32, 0); + const13 = prim std_const(32, 2); + const14 = prim std_const(32, 1); + const15 = prim std_const(32, 1); + const16 = prim std_const(32, 1); + const17 = prim std_const(32, 1); + const18 = prim std_const(32, 1); + const19 = prim std_const(32, 1); + const2 = prim std_const(32, 0); + const20 = prim std_const(32, 1); + const21 = prim std_const(32, 1); + const22 = prim std_const(32, 1); + const3 = prim std_const(32, 511); + const4 = prim std_const(32, 0); + const5 = prim std_const(32, 13); + const6 = prim std_const(32, 0); + const7 = prim std_const(32, 13); + const8 = prim std_const(32, 0); + const9 = prim std_const(32, 511); + data_read0_0 = prim std_reg(32); + dx0 = prim std_reg(32); + dy0 = prim std_reg(32); + fpconst0 = prim fixed_p_std_const(32, 16, 16, 0, 0); + k0 = prim std_reg(32); + kernel_x_0 = prim std_reg(32); + kernel_y_0 = prim std_reg(32); + le0 = prim std_le(32); + le1 = prim std_le(32); + le2 = prim std_le(32); + le3 = prim std_le(32); + le4 = prim std_le(32); + le5 = prim std_le(32); + le6 = prim std_le(32); + mult_pipe0 = prim std_mult_pipe(32); + mult_pipe1 = prim std_mult_pipe(32); + mult_pipe2 = prim std_mult_pipe(32); + slice0 = prim std_slice(32, 3); + slice1 = prim std_slice(32, 10); + slice10 = prim std_slice(32, 10); + slice11 = prim std_slice(32, 4); + slice12 = prim std_slice(32, 4); + slice2 = prim std_slice(32, 4); + slice3 = prim std_slice(32, 4); + slice4 = prim std_slice(32, 10); + slice5 = prim std_slice(32, 10); + slice6 = prim std_slice(32, 2); + slice7 = prim std_slice(32, 2); + slice8 = prim std_slice(32, 32); + slice9 = prim std_slice(32, 3); + weight_read0_0 = prim std_reg(32); + weighted_sum_0 = prim std_reg(32); + x0 = prim std_reg(32); + y0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = b0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = c0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = y0.out; + le2.right = const5.out; + } + group cond3<"static"=0> { + cond3[done] = 1'd1; + le3.left = x0.out; + le3.right = const7.out; + } + group cond4<"static"=0> { + cond4[done] = 1'd1; + le4.left = k0.out; + le4.right = const9.out; + } + group cond5<"static"=0> { + cond5[done] = 1'd1; + le5.left = dy0.out; + le5.right = const11.out; + } + group cond6<"static"=0> { + cond6[done] = 1'd1; + le6.left = dx0.out; + le6.right = const13.out; + } + group let0<"static"=1> { + b0.in = const0.out; + b0.write_en = 1'd1; + let0[done] = b0.done; + } + group let1<"static"=1> { + c0.in = const2.out; + c0.write_en = 1'd1; + let1[done] = c0.done; + } + group let10<"static"=4> { + bin_read1_0.in = mult_pipe1.out; + bin_read1_0.write_en = mult_pipe1.done; + let10[done] = bin_read1_0.done; + mult_pipe1.left = const15.out; + mult_pipe1.right = x0.out; + mult_pipe1.go = !mult_pipe1.done ? 1'd1; + } + group let11<"static"=1> { + kernel_x_0.in = add1.out; + kernel_x_0.write_en = 1'd1; + let11[done] = kernel_x_0.done; + add1.left = bin_read1_0.out; + add1.right = dx0.out; + } + group let12<"static"=1> { + bin_read2_0.in = slice8.out; + bin_read2_0.write_en = 1'd1; + let12[done] = bin_read2_0.done; + slice8.in = mult_pipe2.out; + mult_pipe2.left = data_read0_0.out; + mult_pipe2.right = weight_read0_0.out; + mult_pipe2.go = !mult_pipe2.done ? 1'd1; + } + group let2<"static"=1> { + y0.in = const4.out; + y0.write_en = 1'd1; + let2[done] = y0.done; + } + group let3<"static"=1> { + x0.in = const6.out; + x0.write_en = 1'd1; + let3[done] = x0.done; + } + group let4<"static"=1> { + weighted_sum_0.in = fpconst0.out; + weighted_sum_0.write_en = 1'd1; + let4[done] = weighted_sum_0.done; + } + group let5<"static"=1> { + k0.in = const8.out; + k0.write_en = 1'd1; + let5[done] = k0.done; + } + group let6<"static"=1> { + dy0.in = const10.out; + dy0.write_en = 1'd1; + let6[done] = dy0.done; + } + group let7<"static"=1> { + dx0.in = const12.out; + dx0.write_en = 1'd1; + let7[done] = dx0.done; + } + group let8<"static"=4> { + bin_read0_0.in = mult_pipe0.out; + bin_read0_0.write_en = mult_pipe0.done; + let8[done] = bin_read0_0.done; + mult_pipe0.left = const14.out; + mult_pipe0.right = y0.out; + mult_pipe0.go = !mult_pipe0.done ? 1'd1; + } + group let9<"static"=1> { + kernel_y_0.in = add0.out; + kernel_y_0.write_en = 1'd1; + let9[done] = kernel_y_0.done; + add0.left = bin_read0_0.out; + add0.right = dy0.out; + } + group upd0<"static"=1> { + data_read0_0.write_en = 1'd1; + data0_0_0_0_addr3 = slice3.out; + slice3.in = kernel_x_0.out; + data0_0_0_0_addr2 = slice2.out; + slice2.in = kernel_y_0.out; + data0_0_0_0_addr1 = slice1.out; + slice1.in = k0.out; + data0_0_0_0_addr0 = slice0.out; + slice0.in = b0.out; + data_read0_0.in = 1'd1 ? data0_0_0_0_read_data; + upd0[done] = data_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + weight_read0_0.write_en = 1'd1; + weight0_0_0_0_addr3 = slice7.out; + slice7.in = dx0.out; + weight0_0_0_0_addr2 = slice6.out; + slice6.in = dy0.out; + weight0_0_0_0_addr1 = slice5.out; + slice5.in = k0.out; + weight0_0_0_0_addr0 = slice4.out; + slice4.in = c0.out; + weight_read0_0.in = 1'd1 ? weight0_0_0_0_read_data; + upd1[done] = weight_read0_0.done ? 1'd1; + } + group upd10<"static"=1> { + b0.write_en = 1'd1; + add9.left = b0.out; + add9.right = const22.out; + b0.in = 1'd1 ? add9.out; + upd10[done] = b0.done ? 1'd1; + } + group upd2<"static"=1> { + weighted_sum_0.write_en = 1'd1; + add2.left = weighted_sum_0.out; + add2.right = bin_read2_0.out; + weighted_sum_0.in = 1'd1 ? add2.out; + upd2[done] = weighted_sum_0.done ? 1'd1; + } + group upd3<"static"=1> { + dx0.write_en = 1'd1; + add3.left = dx0.out; + add3.right = const16.out; + dx0.in = 1'd1 ? add3.out; + upd3[done] = dx0.done ? 1'd1; + } + group upd4<"static"=1> { + dy0.write_en = 1'd1; + add4.left = dy0.out; + add4.right = const17.out; + dy0.in = 1'd1 ? add4.out; + upd4[done] = dy0.done ? 1'd1; + } + group upd5<"static"=1> { + k0.write_en = 1'd1; + add5.left = k0.out; + add5.right = const18.out; + k0.in = 1'd1 ? add5.out; + upd5[done] = k0.done ? 1'd1; + } + group upd6<"static"=1> { + x0_0_0_0_addr3 = slice12.out; + slice12.in = x0.out; + x0_0_0_0_addr2 = slice11.out; + slice11.in = y0.out; + x0_0_0_0_addr1 = slice10.out; + slice10.in = c0.out; + x0_0_0_0_addr0 = slice9.out; + slice9.in = b0.out; + x0_0_0_0_write_en = 1'd1; + x0_0_0_0_write_data = 1'd1 ? weighted_sum_0.out; + upd6[done] = x0_0_0_0_done ? 1'd1; + } + group upd7<"static"=1> { + x0.write_en = 1'd1; + add6.left = x0.out; + add6.right = const19.out; + x0.in = 1'd1 ? add6.out; + upd7[done] = x0.done ? 1'd1; + } + group upd8<"static"=1> { + y0.write_en = 1'd1; + add7.left = y0.out; + add7.right = const20.out; + y0.in = 1'd1 ? add7.out; + upd8[done] = y0.done ? 1'd1; + } + group upd9<"static"=1> { + c0.write_en = 1'd1; + add8.left = c0.out; + add8.right = const21.out; + c0.in = 1'd1 ? add8.out; + upd9[done] = c0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + let2; + while le2.out with cond2 { + seq { + let3; + while le3.out with cond3 { + seq { + par { + let4; + seq { + let5; + while le4.out with cond4 { + seq { + let6; + while le5.out with cond5 { + seq { + let7; + while le6.out with cond6 { + seq { + par { + seq { + let8; + let9; + } + seq { + let10; + let11; + } + } + par { + upd0; + upd1; + } + let12; + upd2; + upd3; + } + } + upd4; + } + } + upd5; + } + } + } + } + upd6; + upd7; + } + } + upd8; + } + } + upd9; + } + } + upd10; + } + } + } + } +} + +component main () -> () { + cells { + x = prim std_mem_d4(32, 5, 512, 14, 14, 3, 10, 4, 4); + data = prim std_mem_d4(32, 5, 512, 14, 14, 3, 10, 4, 4); + weight = prim std_mem_d4(32, 512, 512, 3, 3, 10, 10, 2, 2); + conv2d0 = conv2d; + } + wires { + group run_conv2d { + data.addr0 = conv2d0.data0_0_0_0_addr0; + conv2d0.data0_0_0_0_read_data = data.read_data; + data.addr1 = conv2d0.data0_0_0_0_addr1; + data.addr2 = conv2d0.data0_0_0_0_addr2; + data.addr3 = conv2d0.data0_0_0_0_addr3; + weight.addr0 = conv2d0.weight0_0_0_0_addr0; + conv2d0.weight0_0_0_0_read_data = weight.read_data; + weight.addr1 = conv2d0.weight0_0_0_0_addr1; + weight.addr2 = conv2d0.weight0_0_0_0_addr2; + weight.addr3 = conv2d0.weight0_0_0_0_addr3; + x.addr0 = conv2d0.x0_0_0_0_addr0; + x.addr1 = conv2d0.x0_0_0_0_addr1; + x.addr2 = conv2d0.x0_0_0_0_addr2; + x.addr3 = conv2d0.x0_0_0_0_addr3; + x.write_data = conv2d0.x0_0_0_0_write_data; + x.write_en = conv2d0.x0_0_0_0_write_en; + conv2d0.x0_0_0_0_done = x.done; + conv2d0.go = 1'd1; + run_conv2d[done] = conv2d0.done ? 1'd1; + } + } + control { + seq { + run_conv2d; + } + } +} diff --git a/frontends/relay-futil/tests/conv2d.relay b/frontends/relay-futil/tests/conv2d.relay index f59a6cef8f..e759bab61a 100644 --- a/frontends/relay-futil/tests/conv2d.relay +++ b/frontends/relay-futil/tests/conv2d.relay @@ -1,6 +1,6 @@ v0.0.4 -fn (%data: Tensor[(5, 512, 14, 14), int32], %weight: Tensor[(512, 512, 3, 3), int32]) -> Tensor[(5, 512, 14, 14), int32] { - let %x: Tensor[(5, 512, 14, 14), int32] = nn.conv2d(%data, %weight, padding=[1, 1, 1, 1], channels=512, kernel_size=[3, 3]) /* ty=Tensor[(5, 512, 14, 14), int32] */; +fn (%data: Tensor[(5, 512, 14, 14), float32], %weight: Tensor[(512, 512, 3, 3), float32]) -> Tensor[(5, 512, 14, 14), float32] { + let %x: Tensor[(5, 512, 14, 14), float32] = nn.conv2d(%data, %weight, padding=[1, 1, 1, 1], channels=512, kernel_size=[3, 3]) /* ty=Tensor[(5, 512, 14, 14), float32] */; %x } From 839a40e6d0d4ba044150b22a24e1f2a34a28f4f5 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 4 Dec 2020 12:34:29 -0500 Subject: [PATCH 68/75] Singular. --- fud/fud/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fud/fud/main.py b/fud/fud/main.py index 6f2ba5810c..04fdee2709 100644 --- a/fud/fud/main.py +++ b/fud/fud/main.py @@ -38,7 +38,7 @@ def register_stages(registry, config): registry.register( futil.FutilStage(config, 'futil-externalize', '-b futil -p externalize', - 'Compile FuTIL to FuTIL to externalize all external memories primitives')) + 'Compile FuTIL to FuTIL to externalize all external memory primitives')) # Verilator registry.register( From 3dc4b291c5892f9825b3db3c6b22a573f988281d Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 5 Dec 2020 14:52:07 -0500 Subject: [PATCH 69/75] conv2d. --- frontends/relay-futil/dahlia_functions.py | 19 ++- frontends/relay-futil/example.py | 6 +- frontends/relay-futil/tests/conv2d.expect | 18 +-- .../relay-futil/tests/data/conv2d.expect | 120 ++++++++++++++++++ frontends/relay-futil/tests/data/conv2d.relay | 2 +- .../relay-futil/tests/data/conv2d.relay.data | 7 +- 6 files changed, 145 insertions(+), 27 deletions(-) diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 15cc65e38b..1822c45c4f 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -191,7 +191,7 @@ def relu(declaration): def negative(declaration): """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.negative""" op, res = declaration.inputs[0].primitive, declaration.output.primitive - bitwidth, num_dimensions = op.data[0], op.type + bitwidth, num_dimensions, data_type = op.data[0], op.type, op.data_type indices = "" variable_name = CHARACTER_I @@ -201,7 +201,8 @@ def negative(declaration): variable_name = next_character(variable_name) declarations = pp_dahlia_memory_declarations([op, res]) - program_body = pp_dahlia_loop(op, f"""{res.name}{indices} := -{op.name}{indices};""") + zero = '0.0' if data_type == 'ufix' or data_type == 'fix' else '0' + program_body = pp_dahlia_loop(op, f"""{res.name}{indices} := {zero} - {op.name}{indices};""") return lower_dahlia_program(f"""{declarations}{NEWL}{program_body}""", declaration.component_name) @@ -406,7 +407,7 @@ def max_pool2d(declaration): # Only supports a small subset of the `conv2d` function. For example, -# dilation and grouped convlution are not supported. +# dilation and grouped convolution are not supported. def conv2d(declaration): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.conv2d""" data, weight, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive @@ -425,23 +426,21 @@ def conv2d(declaration): for (let c: ubit<32> = 0..{size1}) {{ for (let y: ubit<32> = 0..{size2}) {{ for (let x: ubit<32> = 0..{size3}) {{ - let weighted_sum: {data_type}<{bitwidth}> = {zero}; + let sum: {data_type}<{bitwidth}> = {zero}; for (let k: ubit<32> = 0..{channels}) {{ for (let dy: ubit<32> = 0..{kernel_size[1]}/*kernel_size[1]*/) {{ for (let dx: ubit<32> = 0..{kernel_size[0]}/*kernel_size[0]*/) {{ - let kernel_y: ubit<32> = /*strides[0]*/{strides[0]} * y + dy; - let kernel_x: ubit<32> = /*strides[1]*/{strides[1]} * x + dx; - weighted_sum += {data.name}[b][k][kernel_y][kernel_x] * {weight.name}[c][k][dy][dx]; - }} + let kernel_y: ubit<32> = (/*strides[0]*/{strides[0]} * y) + dy; + let kernel_x: ubit<32> = (/*strides[1]*/{strides[1]} * x) + dx; + }} combine {{ sum += {data.name}[b][k][kernel_y][kernel_x] * {weight.name}[c][k][dy][dx]; }} }} }} - {res.name}[b][c][y][x] := weighted_sum; + {res.name}[b][c][y][x] := sum; }} }} }} }} """ program = f"""{declarations}{NEWL}{program_body}""" - return lower_dahlia_program(program, declaration.component_name) diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index 34dc9120ee..1028e7cb47 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -60,9 +60,9 @@ def max_pool2d(): def conv2d(): - d = relay.var('data', shape=[1,2,2,2], dtype='int32') - w = relay.var('weight', shape=[1,2,2,2], dtype='int32') - return relay.Function([d, w], relay.nn.conv2d(d, w, padding=[1, 1, 1, 1], channels=1, kernel_size=[2,2])) + d = relay.var('data', shape=[5, 512, 14, 14], dtype='int32') + w = relay.var('weight', shape=[512, 512, 3, 3], dtype='int32') + return relay.Function([d, w], relay.nn.conv2d(d, w, padding=[1, 1, 1, 1], channels=512, kernel_size=[3, 3])) def mlp_net(): diff --git a/frontends/relay-futil/tests/conv2d.expect b/frontends/relay-futil/tests/conv2d.expect index a9fa6b4dfd..1d2163c61e 100644 --- a/frontends/relay-futil/tests/conv2d.expect +++ b/frontends/relay-futil/tests/conv2d.expect @@ -70,8 +70,8 @@ component conv2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, slice7 = prim std_slice(32, 2); slice8 = prim std_slice(32, 32); slice9 = prim std_slice(32, 3); + sum_0 = prim std_reg(32); weight_read0_0 = prim std_reg(32); - weighted_sum_0 = prim std_reg(32); x0 = prim std_reg(32); y0 = prim std_reg(32); } @@ -156,9 +156,9 @@ component conv2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, let3[done] = x0.done; } group let4<"static"=1> { - weighted_sum_0.in = fpconst0.out; - weighted_sum_0.write_en = 1'd1; - let4[done] = weighted_sum_0.done; + sum_0.in = fpconst0.out; + sum_0.write_en = 1'd1; + let4[done] = sum_0.done; } group let5<"static"=1> { k0.in = const8.out; @@ -224,11 +224,11 @@ component conv2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, upd10[done] = b0.done ? 1'd1; } group upd2<"static"=1> { - weighted_sum_0.write_en = 1'd1; - add2.left = weighted_sum_0.out; + sum_0.write_en = 1'd1; + add2.left = sum_0.out; add2.right = bin_read2_0.out; - weighted_sum_0.in = 1'd1 ? add2.out; - upd2[done] = weighted_sum_0.done ? 1'd1; + sum_0.in = 1'd1 ? add2.out; + upd2[done] = sum_0.done ? 1'd1; } group upd3<"static"=1> { dx0.write_en = 1'd1; @@ -261,7 +261,7 @@ component conv2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, x0_0_0_0_addr0 = slice9.out; slice9.in = b0.out; x0_0_0_0_write_en = 1'd1; - x0_0_0_0_write_data = 1'd1 ? weighted_sum_0.out; + x0_0_0_0_write_data = 1'd1 ? sum_0.out; upd6[done] = x0_0_0_0_done ? 1'd1; } group upd7<"static"=1> { diff --git a/frontends/relay-futil/tests/data/conv2d.expect b/frontends/relay-futil/tests/data/conv2d.expect index e69de29bb2..2f8cb5e0be 100644 --- a/frontends/relay-futil/tests/data/conv2d.expect +++ b/frontends/relay-futil/tests/data/conv2d.expect @@ -0,0 +1,120 @@ +{ + "data": [ + [ + [ + [ + 1, + 1 + ], + [ + 4, + 1 + ] + ], + [ + [ + 1, + 1 + ], + [ + 1, + 1 + ] + ] + ], + [ + [ + [ + 1, + 1 + ], + [ + 1, + 1 + ] + ], + [ + [ + 1, + 1 + ], + [ + 1, + 1 + ] + ] + ] + ], + "weight": [ + [ + [ + [ + 2, + 1 + ], + [ + 1, + 1 + ] + ], + [ + [ + 1, + 1 + ], + [ + 1, + 1 + ] + ] + ], + [ + [ + [ + 1, + 1 + ], + [ + 1, + 1 + ] + ], + [ + [ + 1, + 3 + ], + [ + 1, + 4 + ] + ] + ] + ], + "x": [ + [ + [ + [ + 12 + ] + ], + [ + [ + 16 + ] + ] + ], + [ + [ + [ + 9 + ] + ], + [ + [ + 13 + ] + ] + ] + ] +} diff --git a/frontends/relay-futil/tests/data/conv2d.relay b/frontends/relay-futil/tests/data/conv2d.relay index d85f4aae0e..168e53e418 100644 --- a/frontends/relay-futil/tests/data/conv2d.relay +++ b/frontends/relay-futil/tests/data/conv2d.relay @@ -1,5 +1,5 @@ v0.0.4 fn (%data: Tensor[(2, 2, 2, 2), int32], %weight: Tensor[(2, 2, 2, 2), int32]) { - let %x = nn.conv2d(%data, %weight, padding=[1, 1, 1, 1], channels=2, kernel_size=[2, 2]); + let %x = nn.conv2d(%data, %weight, channels=2, kernel_size=[2,2]); %x } diff --git a/frontends/relay-futil/tests/data/conv2d.relay.data b/frontends/relay-futil/tests/data/conv2d.relay.data index c149ae31ea..81591e0997 100644 --- a/frontends/relay-futil/tests/data/conv2d.relay.data +++ b/frontends/relay-futil/tests/data/conv2d.relay.data @@ -1,15 +1,14 @@ { "data": { - "data": [ [[[1,0], [0,11]], [[10,4], [11,14]]], [[[10,100], [0,0]], [[0,10], [0,4]]] ], + "data": [ [[[1,1], [4,1]], [[1,1], [1,1]]], [[[1,1], [1,1]], [[1,1], [1,1]]] ], "bitwidth": 32 }, "weight": { - "data": [ [[[2,1], [3,4]], [[5,5], [0,3]]], [[[2,1], [4,4]], [[19,0], [20,0]]] ], + "data": [ [[[2,1], [1,1]], [[1,1], [1,1]]], [[[1,1], [1,1]], [[1,3], [1,4]]] ], "bitwidth": 32 }, "x": { - "data": [ [[[0,0,0], [0,0,0], [0,0,0]], [[0,0,0], [0,0,0], [0,0,0]]], - [[[0,0,0], [0,0,0], [0,0,0]], [[0,0,0], [0,0,0], [0,0,0]]] ], + "data": [ [[[0]], [[0]]], [[[0]], [[0]]] ], "bitwidth": 32 } } \ No newline at end of file From 13ec4ff31824f5a00f48143530881c24cf01a710 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sat, 5 Dec 2020 14:57:55 -0500 Subject: [PATCH 70/75] Remove mlp from test suite. --- frontends/relay-futil/tests/mlp_net.expect | 1997 -------------------- frontends/relay-futil/tests/mlp_net.relay | 16 - 2 files changed, 2013 deletions(-) delete mode 100644 frontends/relay-futil/tests/mlp_net.expect delete mode 100644 frontends/relay-futil/tests/mlp_net.relay diff --git a/frontends/relay-futil/tests/mlp_net.expect b/frontends/relay-futil/tests/mlp_net.expect deleted file mode 100644 index 9f7c781f88..0000000000 --- a/frontends/relay-futil/tests/mlp_net.expect +++ /dev/null @@ -1,1997 +0,0 @@ -import "primitives/std.lib"; - -component softmax(go: 1, clk: 1, x80_0_read_data: 32, x80_0_done: 1, x90_0_read_data: 32, x90_0_done: 1) -> (done: 1, x80_0_addr0: 1, x80_0_addr1: 4, x80_0_write_data: 32, x80_0_write_en: 1, x80_0_clk: 1, x90_0_addr0: 1, x90_0_addr1: 4, x90_0_write_data: 32, x90_0_write_en: 1, x90_0_clk: 1) { - cells { - add0 = prim fixed_p_std_add(32, 16, 16); - add1 = prim std_add(4); - add2 = prim std_add(4); - add3 = prim std_add(1); - bin_read0_0 = prim std_reg(32); - const0 = prim std_const(1, 0); - const1 = prim std_const(1, 0); - const2 = prim std_const(4, 0); - const3 = prim std_const(4, 9); - const4 = prim std_const(4, 1); - const5 = prim std_const(4, 0); - const6 = prim std_const(4, 9); - const7 = prim std_const(4, 1); - const8 = prim std_const(1, 1); - div_pipe0 = prim std_div_pipe(32); - exp0 = prim std_exp(); - exp1 = prim std_exp(); - fpconst0 = prim fixed_p_std_const(32, 16, 16, 0, 0); - i0 = prim std_reg(1); - j0 = prim std_reg(4); - k0 = prim std_reg(4); - le0 = prim std_le(1); - le1 = prim std_le(4); - le2 = prim std_le(4); - slice0 = prim std_slice(32, 32); - x8_expsum_0 = prim std_reg(32); - x8_read0_0 = prim std_reg(32); - x8_read1_0 = prim std_reg(32); - x9_read0_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group cond2<"static"=0> { - cond2[done] = 1'd1; - le2.left = k0.out; - le2.right = const6.out; - } - group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; - } - group let1<"static"=1> { - x8_expsum_0.in = fpconst0.out; - x8_expsum_0.write_en = 1'd1; - let1[done] = x8_expsum_0.done; - } - group let2<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let2[done] = j0.done; - } - group let3<"static"=1> { - k0.in = const5.out; - k0.write_en = 1'd1; - let3[done] = k0.done; - } - group let4<"static"=1> { - bin_read0_0.in = slice0.out; - bin_read0_0.write_en = 1'd1; - let4[done] = bin_read0_0.done; - slice0.in = div_pipe0.out; - div_pipe0.left = x9_read0_0.out; - div_pipe0.right = x8_expsum_0.out; - div_pipe0.go = !div_pipe0.done ? 1'd1; - } - group upd0<"static"=1> { - x8_read0_0.write_en = 1'd1; - x80_0_addr1 = j0.out; - x80_0_addr0 = i0.out; - x8_read0_0.in = 1'd1 ? x80_0_read_data; - upd0[done] = x8_read0_0.done ? 1'd1; - } - group upd1 { - x8_expsum_0.write_en = 1'd1; - add0.left = x8_expsum_0.out; - add0.right = exp0.out; - exp0.exponent = x8_read0_0.out; - exp0.go = !exp0.done ? 1'd1; - x8_expsum_0.in = 1'd1 ? add0.out; - upd1[done] = x8_expsum_0.done ? 1'd1; - } - group upd2<"static"=1> { - j0.write_en = 1'd1; - add1.left = j0.out; - add1.right = const4.out; - j0.in = 1'd1 ? add1.out; - upd2[done] = j0.done ? 1'd1; - } - group upd3<"static"=1> { - x8_read1_0.write_en = 1'd1; - x80_0_addr1 = k0.out; - x80_0_addr0 = i0.out; - x8_read1_0.in = 1'd1 ? x80_0_read_data; - upd3[done] = x8_read1_0.done ? 1'd1; - } - group upd4 { - x90_0_addr1 = k0.out; - x90_0_addr0 = i0.out; - x90_0_write_en = exp1.done; - exp1.exponent = x8_read1_0.out; - exp1.go = !exp1.done ? 1'd1; - x90_0_write_data = exp1.done ? exp1.out; - upd4[done] = x90_0_done ? 1'd1; - } - group upd5<"static"=1> { - x9_read0_0.write_en = 1'd1; - x90_0_addr1 = k0.out; - x90_0_addr0 = i0.out; - x9_read0_0.in = 1'd1 ? x90_0_read_data; - upd5[done] = x9_read0_0.done ? 1'd1; - } - group upd6<"static"=1> { - x90_0_addr1 = k0.out; - x90_0_addr0 = i0.out; - x90_0_write_en = 1'd1; - x90_0_write_data = 1'd1 ? bin_read0_0.out; - upd6[done] = x90_0_done ? 1'd1; - } - group upd7<"static"=1> { - k0.write_en = 1'd1; - add2.left = k0.out; - add2.right = const7.out; - k0.in = 1'd1 ? add2.out; - upd7[done] = k0.done ? 1'd1; - } - group upd8<"static"=1> { - i0.write_en = 1'd1; - add3.left = i0.out; - add3.right = const8.out; - i0.in = 1'd1 ? add3.out; - upd8[done] = i0.done ? 1'd1; - } - } - - control { - seq { - let0; - while le0.out with cond0 { - seq { - par { - let1; - seq { - let2; - while le1.out with cond1 { - seq { - upd0; - upd1; - upd2; - } - } - } - } - let3; - while le2.out with cond2 { - seq { - upd3; - upd4; - upd5; - let4; - upd6; - upd7; - } - } - upd8; - } - } - } - } -} - -component bias_add2(go: 1, clk: 1, fc3_bias0_read_data: 32, fc3_bias0_done: 1, x70_0_read_data: 32, x70_0_done: 1, x80_0_read_data: 32, x80_0_done: 1) -> (done: 1, fc3_bias0_addr0: 4, fc3_bias0_write_data: 32, fc3_bias0_write_en: 1, fc3_bias0_clk: 1, x70_0_addr0: 1, x70_0_addr1: 4, x70_0_write_data: 32, x70_0_write_en: 1, x70_0_clk: 1, x80_0_addr0: 1, x80_0_addr1: 4, x80_0_write_data: 32, x80_0_write_en: 1, x80_0_clk: 1) { - cells { - add0 = prim fixed_p_std_add(32, 16, 16); - add1 = prim std_add(4); - add2 = prim std_add(1); - const0 = prim std_const(1, 0); - const1 = prim std_const(1, 0); - const2 = prim std_const(4, 0); - const3 = prim std_const(4, 9); - const4 = prim std_const(4, 1); - const5 = prim std_const(1, 1); - fc3_bias_read0_0 = prim std_reg(32); - i0 = prim std_reg(1); - j0 = prim std_reg(4); - le0 = prim std_le(1); - le1 = prim std_le(4); - x7_read0_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; - } - group let1<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let1[done] = j0.done; - } - group upd0<"static"=1> { - x7_read0_0.write_en = 1'd1; - x70_0_addr1 = j0.out; - x70_0_addr0 = i0.out; - x7_read0_0.in = 1'd1 ? x70_0_read_data; - upd0[done] = x7_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - fc3_bias_read0_0.write_en = 1'd1; - fc3_bias0_addr0 = j0.out; - fc3_bias_read0_0.in = 1'd1 ? fc3_bias0_read_data; - upd1[done] = fc3_bias_read0_0.done ? 1'd1; - } - group upd2<"static"=1> { - x80_0_addr1 = j0.out; - x80_0_addr0 = i0.out; - x80_0_write_en = 1'd1; - add0.left = x7_read0_0.out; - add0.right = fc3_bias_read0_0.out; - x80_0_write_data = 1'd1 ? add0.out; - upd2[done] = x80_0_done ? 1'd1; - } - group upd3<"static"=1> { - j0.write_en = 1'd1; - add1.left = j0.out; - add1.right = const4.out; - j0.in = 1'd1 ? add1.out; - upd3[done] = j0.done ? 1'd1; - } - group upd4<"static"=1> { - i0.write_en = 1'd1; - add2.left = i0.out; - add2.right = const5.out; - i0.in = 1'd1 ? add2.out; - upd4[done] = i0.done ? 1'd1; - } - } - - control { - seq { - let0; - while le0.out with cond0 { - seq { - let1; - while le1.out with cond1 { - seq { - par { - upd0; - upd1; - } - upd2; - upd3; - } - } - upd4; - } - } - } - } -} - -component dense2(go: 1, clk: 1, fc3_weight0_0_read_data: 32, fc3_weight0_0_done: 1, x60_0_read_data: 32, x60_0_done: 1, x70_0_read_data: 32, x70_0_done: 1) -> (done: 1, fc3_weight0_0_addr0: 4, fc3_weight0_0_addr1: 7, fc3_weight0_0_write_data: 32, fc3_weight0_0_write_en: 1, fc3_weight0_0_clk: 1, x60_0_addr0: 1, x60_0_addr1: 7, x60_0_write_data: 32, x60_0_write_en: 1, x60_0_clk: 1, x70_0_addr0: 1, x70_0_addr1: 4, x70_0_write_data: 32, x70_0_write_en: 1, x70_0_clk: 1) { - cells { - add0 = prim std_add(7); - add1 = prim std_add(4); - add2 = prim fixed_p_std_add(32, 16, 16); - add3 = prim std_add(7); - add4 = prim std_add(4); - add5 = prim std_add(1); - add6 = prim std_add(4); - add7 = prim std_add(1); - bin_read0_0 = prim std_reg(32); - const0 = prim std_const(4, 0); - const1 = prim std_const(4, 9); - const10 = prim std_const(7, 0); - const11 = prim std_const(7, 63); - const12 = prim std_const(7, 1); - const13 = prim std_const(4, 1); - const14 = prim std_const(1, 1); - const15 = prim std_const(1, 0); - const16 = prim std_const(1, 0); - const17 = prim std_const(4, 0); - const18 = prim std_const(4, 9); - const19 = prim std_const(4, 1); - const2 = prim std_const(7, 0); - const20 = prim std_const(1, 1); - const3 = prim std_const(7, 63); - const4 = prim std_const(7, 1); - const5 = prim std_const(4, 1); - const6 = prim std_const(1, 0); - const7 = prim std_const(1, 0); - const8 = prim std_const(4, 0); - const9 = prim std_const(4, 9); - fc3_weight_read0_0 = prim std_reg(32); - i0 = prim std_reg(4); - i1 = prim std_reg(1); - i2 = prim std_reg(1); - j0 = prim std_reg(7); - j1 = prim std_reg(4); - j2 = prim std_reg(4); - k0 = prim std_reg(7); - le0 = prim std_le(4); - le1 = prim std_le(7); - le2 = prim std_le(1); - le3 = prim std_le(4); - le4 = prim std_le(7); - le5 = prim std_le(1); - le6 = prim std_le(4); - mult_pipe0 = prim std_mult_pipe(32); - product_0 = prim std_reg(32); - slice0 = prim std_slice(32, 32); - slice1 = prim std_slice(32, 32); - temporary_x70_0 = prim std_mem_d2(32, 1, 10, 1, 4); - temporary_x7_read0_0 = prim std_reg(32); - transpose_fc3_weight0_0 = prim std_mem_d2(32, 64, 10, 7, 4); - transpose_fc3_weight_read0_0 = prim std_reg(32); - x6_read0_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group cond2<"static"=0> { - cond2[done] = 1'd1; - le2.left = i1.out; - le2.right = const7.out; - } - group cond3<"static"=0> { - cond3[done] = 1'd1; - le3.left = j1.out; - le3.right = const9.out; - } - group cond4<"static"=0> { - cond4[done] = 1'd1; - le4.left = k0.out; - le4.right = const11.out; - } - group cond5<"static"=0> { - cond5[done] = 1'd1; - le5.left = i2.out; - le5.right = const16.out; - } - group cond6<"static"=0> { - cond6[done] = 1'd1; - le6.left = j2.out; - le6.right = const18.out; - } - group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; - } - group let1<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let1[done] = j0.done; - } - group let2<"static"=1> { - i1.in = const6.out; - i1.write_en = 1'd1; - let2[done] = i1.done; - } - group let3<"static"=1> { - j1.in = const8.out; - j1.write_en = 1'd1; - let3[done] = j1.done; - } - group let4<"static"=1> { - k0.in = const10.out; - k0.write_en = 1'd1; - let4[done] = k0.done; - } - group let5<"static"=1> { - bin_read0_0.in = slice0.out; - bin_read0_0.write_en = 1'd1; - let5[done] = bin_read0_0.done; - slice0.in = mult_pipe0.out; - mult_pipe0.left = x6_read0_0.out; - mult_pipe0.right = transpose_fc3_weight_read0_0.out; - mult_pipe0.go = !mult_pipe0.done ? 1'd1; - } - group let6<"static"=1> { - product_0.in = slice1.out; - product_0.write_en = 1'd1; - let6[done] = product_0.done; - slice1.in = bin_read0_0.out; - } - group let7<"static"=1> { - i2.in = const15.out; - i2.write_en = 1'd1; - let7[done] = i2.done; - } - group let8<"static"=1> { - j2.in = const17.out; - j2.write_en = 1'd1; - let8[done] = j2.done; - } - group upd0<"static"=1> { - fc3_weight_read0_0.write_en = 1'd1; - fc3_weight0_0_addr1 = j0.out; - fc3_weight0_0_addr0 = i0.out; - fc3_weight_read0_0.in = 1'd1 ? fc3_weight0_0_read_data; - upd0[done] = fc3_weight_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - transpose_fc3_weight0_0.addr1 = i0.out; - transpose_fc3_weight0_0.addr0 = j0.out; - transpose_fc3_weight0_0.write_en = 1'd1; - transpose_fc3_weight0_0.write_data = 1'd1 ? fc3_weight_read0_0.out; - upd1[done] = transpose_fc3_weight0_0.done ? 1'd1; - } - group upd10<"static"=1> { - temporary_x7_read0_0.write_en = 1'd1; - temporary_x70_0.addr1 = j2.out; - temporary_x70_0.addr0 = i2.out; - temporary_x7_read0_0.in = 1'd1 ? temporary_x70_0.read_data; - upd10[done] = temporary_x7_read0_0.done ? 1'd1; - } - group upd11<"static"=1> { - x70_0_addr1 = j2.out; - x70_0_addr0 = i2.out; - x70_0_write_en = 1'd1; - x70_0_write_data = 1'd1 ? temporary_x7_read0_0.out; - upd11[done] = x70_0_done ? 1'd1; - } - group upd12<"static"=1> { - j2.write_en = 1'd1; - add6.left = j2.out; - add6.right = const19.out; - j2.in = 1'd1 ? add6.out; - upd12[done] = j2.done ? 1'd1; - } - group upd13<"static"=1> { - i2.write_en = 1'd1; - add7.left = i2.out; - add7.right = const20.out; - i2.in = 1'd1 ? add7.out; - upd13[done] = i2.done ? 1'd1; - } - group upd2<"static"=1> { - j0.write_en = 1'd1; - add0.left = j0.out; - add0.right = const4.out; - j0.in = 1'd1 ? add0.out; - upd2[done] = j0.done ? 1'd1; - } - group upd3<"static"=1> { - i0.write_en = 1'd1; - add1.left = i0.out; - add1.right = const5.out; - i0.in = 1'd1 ? add1.out; - upd3[done] = i0.done ? 1'd1; - } - group upd4<"static"=1> { - x6_read0_0.write_en = 1'd1; - x60_0_addr1 = k0.out; - x60_0_addr0 = i1.out; - x6_read0_0.in = 1'd1 ? x60_0_read_data; - upd4[done] = x6_read0_0.done ? 1'd1; - } - group upd5<"static"=1> { - transpose_fc3_weight_read0_0.write_en = 1'd1; - transpose_fc3_weight0_0.addr1 = j1.out; - transpose_fc3_weight0_0.addr0 = k0.out; - transpose_fc3_weight_read0_0.in = 1'd1 ? transpose_fc3_weight0_0.read_data; - upd5[done] = transpose_fc3_weight_read0_0.done ? 1'd1; - } - group upd6<"static"=1> { - temporary_x70_0.addr1 = j1.out; - temporary_x70_0.addr0 = i1.out; - temporary_x70_0.write_en = 1'd1; - add2.left = temporary_x70_0.read_data; - add2.right = product_0.out; - temporary_x70_0.addr1 = j1.out; - temporary_x70_0.addr0 = i1.out; - temporary_x70_0.write_data = 1'd1 ? add2.out; - upd6[done] = temporary_x70_0.done ? 1'd1; - } - group upd7<"static"=1> { - k0.write_en = 1'd1; - add3.left = k0.out; - add3.right = const12.out; - k0.in = 1'd1 ? add3.out; - upd7[done] = k0.done ? 1'd1; - } - group upd8<"static"=1> { - j1.write_en = 1'd1; - add4.left = j1.out; - add4.right = const13.out; - j1.in = 1'd1 ? add4.out; - upd8[done] = j1.done ? 1'd1; - } - group upd9<"static"=1> { - i1.write_en = 1'd1; - add5.left = i1.out; - add5.right = const14.out; - i1.in = 1'd1 ? add5.out; - upd9[done] = i1.done ? 1'd1; - } - } - - control { - seq { - let0; - while le0.out with cond0 { - seq { - let1; - while le1.out with cond1 { - seq { - upd0; - upd1; - upd2; - } - } - upd3; - } - } - let2; - while le2.out with cond2 { - seq { - let3; - while le3.out with cond3 { - seq { - let4; - while le4.out with cond4 { - seq { - par { - upd4; - upd5; - } - let5; - let6; - upd6; - upd7; - } - } - upd8; - } - } - upd9; - } - } - let7; - while le5.out with cond5 { - seq { - let8; - while le6.out with cond6 { - seq { - upd10; - upd11; - upd12; - } - } - upd13; - } - } - } - } -} - -component relu1(go: 1, clk: 1, x50_0_read_data: 32, x50_0_done: 1, x60_0_read_data: 32, x60_0_done: 1) -> (done: 1, x50_0_addr0: 1, x50_0_addr1: 7, x50_0_write_data: 32, x50_0_write_en: 1, x50_0_clk: 1, x60_0_addr0: 1, x60_0_addr1: 7, x60_0_write_data: 32, x60_0_write_en: 1, x60_0_clk: 1) { - cells { - add0 = prim std_add(7); - add1 = prim std_add(1); - const0 = prim std_const(1, 0); - const1 = prim std_const(1, 0); - const2 = prim std_const(7, 0); - const3 = prim std_const(7, 63); - const4 = prim std_const(7, 1); - const5 = prim std_const(1, 1); - fpconst0 = prim fixed_p_std_const(32, 16, 16, 0, 0); - gt0 = prim fixed_p_std_gt(32, 16, 16); - i0 = prim std_reg(1); - j0 = prim std_reg(7); - le0 = prim std_le(1); - le1 = prim std_le(7); - x5_read0_0 = prim std_reg(32); - x5_read1_0 = prim std_reg(32); - zero_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group cond2<"static"=0> { - cond2[done] = 1'd1; - gt0.left = x5_read0_0.out; - gt0.right = zero_0.out; - } - group let0<"static"=1> { - zero_0.in = fpconst0.out; - zero_0.write_en = 1'd1; - let0[done] = zero_0.done; - } - group let1<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let1[done] = i0.done; - } - group let2<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let2[done] = j0.done; - } - group upd0<"static"=1> { - x5_read0_0.write_en = 1'd1; - x50_0_addr1 = j0.out; - x50_0_addr0 = i0.out; - x5_read0_0.in = 1'd1 ? x50_0_read_data; - upd0[done] = x5_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - x5_read1_0.write_en = 1'd1; - x50_0_addr1 = j0.out; - x50_0_addr0 = i0.out; - x5_read1_0.in = 1'd1 ? x50_0_read_data; - upd1[done] = x5_read1_0.done ? 1'd1; - } - group upd2<"static"=1> { - x60_0_addr1 = j0.out; - x60_0_addr0 = i0.out; - x60_0_write_en = 1'd1; - x60_0_write_data = 1'd1 ? x5_read1_0.out; - upd2[done] = x60_0_done ? 1'd1; - } - group upd3<"static"=1> { - x60_0_addr1 = j0.out; - x60_0_addr0 = i0.out; - x60_0_write_en = 1'd1; - x60_0_write_data = 1'd1 ? zero_0.out; - upd3[done] = x60_0_done ? 1'd1; - } - group upd4<"static"=1> { - j0.write_en = 1'd1; - add0.left = j0.out; - add0.right = const4.out; - j0.in = 1'd1 ? add0.out; - upd4[done] = j0.done ? 1'd1; - } - group upd5<"static"=1> { - i0.write_en = 1'd1; - add1.left = i0.out; - add1.right = const5.out; - i0.in = 1'd1 ? add1.out; - upd5[done] = i0.done ? 1'd1; - } - } - - control { - seq { - let0; - let1; - while le0.out with cond0 { - seq { - let2; - while le1.out with cond1 { - seq { - upd0; - if gt0.out with cond2 { - seq { - upd1; - upd2; - } - } else { - upd3; - } - upd4; - } - } - upd5; - } - } - } - } -} - -component bias_add1(go: 1, clk: 1, fc2_bias0_read_data: 32, fc2_bias0_done: 1, x40_0_read_data: 32, x40_0_done: 1, x50_0_read_data: 32, x50_0_done: 1) -> (done: 1, fc2_bias0_addr0: 7, fc2_bias0_write_data: 32, fc2_bias0_write_en: 1, fc2_bias0_clk: 1, x40_0_addr0: 1, x40_0_addr1: 7, x40_0_write_data: 32, x40_0_write_en: 1, x40_0_clk: 1, x50_0_addr0: 1, x50_0_addr1: 7, x50_0_write_data: 32, x50_0_write_en: 1, x50_0_clk: 1) { - cells { - add0 = prim fixed_p_std_add(32, 16, 16); - add1 = prim std_add(7); - add2 = prim std_add(1); - const0 = prim std_const(1, 0); - const1 = prim std_const(1, 0); - const2 = prim std_const(7, 0); - const3 = prim std_const(7, 63); - const4 = prim std_const(7, 1); - const5 = prim std_const(1, 1); - fc2_bias_read0_0 = prim std_reg(32); - i0 = prim std_reg(1); - j0 = prim std_reg(7); - le0 = prim std_le(1); - le1 = prim std_le(7); - x4_read0_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; - } - group let1<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let1[done] = j0.done; - } - group upd0<"static"=1> { - x4_read0_0.write_en = 1'd1; - x40_0_addr1 = j0.out; - x40_0_addr0 = i0.out; - x4_read0_0.in = 1'd1 ? x40_0_read_data; - upd0[done] = x4_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - fc2_bias_read0_0.write_en = 1'd1; - fc2_bias0_addr0 = j0.out; - fc2_bias_read0_0.in = 1'd1 ? fc2_bias0_read_data; - upd1[done] = fc2_bias_read0_0.done ? 1'd1; - } - group upd2<"static"=1> { - x50_0_addr1 = j0.out; - x50_0_addr0 = i0.out; - x50_0_write_en = 1'd1; - add0.left = x4_read0_0.out; - add0.right = fc2_bias_read0_0.out; - x50_0_write_data = 1'd1 ? add0.out; - upd2[done] = x50_0_done ? 1'd1; - } - group upd3<"static"=1> { - j0.write_en = 1'd1; - add1.left = j0.out; - add1.right = const4.out; - j0.in = 1'd1 ? add1.out; - upd3[done] = j0.done ? 1'd1; - } - group upd4<"static"=1> { - i0.write_en = 1'd1; - add2.left = i0.out; - add2.right = const5.out; - i0.in = 1'd1 ? add2.out; - upd4[done] = i0.done ? 1'd1; - } - } - - control { - seq { - let0; - while le0.out with cond0 { - seq { - let1; - while le1.out with cond1 { - seq { - par { - upd0; - upd1; - } - upd2; - upd3; - } - } - upd4; - } - } - } - } -} - -component dense1(go: 1, clk: 1, fc2_weight0_0_read_data: 32, fc2_weight0_0_done: 1, x30_0_read_data: 32, x30_0_done: 1, x40_0_read_data: 32, x40_0_done: 1) -> (done: 1, fc2_weight0_0_addr0: 7, fc2_weight0_0_addr1: 8, fc2_weight0_0_write_data: 32, fc2_weight0_0_write_en: 1, fc2_weight0_0_clk: 1, x30_0_addr0: 1, x30_0_addr1: 8, x30_0_write_data: 32, x30_0_write_en: 1, x30_0_clk: 1, x40_0_addr0: 1, x40_0_addr1: 7, x40_0_write_data: 32, x40_0_write_en: 1, x40_0_clk: 1) { - cells { - add0 = prim std_add(8); - add1 = prim std_add(7); - add2 = prim fixed_p_std_add(32, 16, 16); - add3 = prim std_add(8); - add4 = prim std_add(7); - add5 = prim std_add(1); - add6 = prim std_add(7); - add7 = prim std_add(1); - bin_read0_0 = prim std_reg(32); - const0 = prim std_const(7, 0); - const1 = prim std_const(7, 63); - const10 = prim std_const(8, 0); - const11 = prim std_const(8, 127); - const12 = prim std_const(8, 1); - const13 = prim std_const(7, 1); - const14 = prim std_const(1, 1); - const15 = prim std_const(1, 0); - const16 = prim std_const(1, 0); - const17 = prim std_const(7, 0); - const18 = prim std_const(7, 63); - const19 = prim std_const(7, 1); - const2 = prim std_const(8, 0); - const20 = prim std_const(1, 1); - const3 = prim std_const(8, 127); - const4 = prim std_const(8, 1); - const5 = prim std_const(7, 1); - const6 = prim std_const(1, 0); - const7 = prim std_const(1, 0); - const8 = prim std_const(7, 0); - const9 = prim std_const(7, 63); - fc2_weight_read0_0 = prim std_reg(32); - i0 = prim std_reg(7); - i1 = prim std_reg(1); - i2 = prim std_reg(1); - j0 = prim std_reg(8); - j1 = prim std_reg(7); - j2 = prim std_reg(7); - k0 = prim std_reg(8); - le0 = prim std_le(7); - le1 = prim std_le(8); - le2 = prim std_le(1); - le3 = prim std_le(7); - le4 = prim std_le(8); - le5 = prim std_le(1); - le6 = prim std_le(7); - mult_pipe0 = prim std_mult_pipe(32); - product_0 = prim std_reg(32); - slice0 = prim std_slice(32, 32); - slice1 = prim std_slice(32, 32); - temporary_x40_0 = prim std_mem_d2(32, 1, 64, 1, 7); - temporary_x4_read0_0 = prim std_reg(32); - transpose_fc2_weight0_0 = prim std_mem_d2(32, 128, 64, 8, 7); - transpose_fc2_weight_read0_0 = prim std_reg(32); - x3_read0_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group cond2<"static"=0> { - cond2[done] = 1'd1; - le2.left = i1.out; - le2.right = const7.out; - } - group cond3<"static"=0> { - cond3[done] = 1'd1; - le3.left = j1.out; - le3.right = const9.out; - } - group cond4<"static"=0> { - cond4[done] = 1'd1; - le4.left = k0.out; - le4.right = const11.out; - } - group cond5<"static"=0> { - cond5[done] = 1'd1; - le5.left = i2.out; - le5.right = const16.out; - } - group cond6<"static"=0> { - cond6[done] = 1'd1; - le6.left = j2.out; - le6.right = const18.out; - } - group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; - } - group let1<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let1[done] = j0.done; - } - group let2<"static"=1> { - i1.in = const6.out; - i1.write_en = 1'd1; - let2[done] = i1.done; - } - group let3<"static"=1> { - j1.in = const8.out; - j1.write_en = 1'd1; - let3[done] = j1.done; - } - group let4<"static"=1> { - k0.in = const10.out; - k0.write_en = 1'd1; - let4[done] = k0.done; - } - group let5<"static"=1> { - bin_read0_0.in = slice0.out; - bin_read0_0.write_en = 1'd1; - let5[done] = bin_read0_0.done; - slice0.in = mult_pipe0.out; - mult_pipe0.left = x3_read0_0.out; - mult_pipe0.right = transpose_fc2_weight_read0_0.out; - mult_pipe0.go = !mult_pipe0.done ? 1'd1; - } - group let6<"static"=1> { - product_0.in = slice1.out; - product_0.write_en = 1'd1; - let6[done] = product_0.done; - slice1.in = bin_read0_0.out; - } - group let7<"static"=1> { - i2.in = const15.out; - i2.write_en = 1'd1; - let7[done] = i2.done; - } - group let8<"static"=1> { - j2.in = const17.out; - j2.write_en = 1'd1; - let8[done] = j2.done; - } - group upd0<"static"=1> { - fc2_weight_read0_0.write_en = 1'd1; - fc2_weight0_0_addr1 = j0.out; - fc2_weight0_0_addr0 = i0.out; - fc2_weight_read0_0.in = 1'd1 ? fc2_weight0_0_read_data; - upd0[done] = fc2_weight_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - transpose_fc2_weight0_0.addr1 = i0.out; - transpose_fc2_weight0_0.addr0 = j0.out; - transpose_fc2_weight0_0.write_en = 1'd1; - transpose_fc2_weight0_0.write_data = 1'd1 ? fc2_weight_read0_0.out; - upd1[done] = transpose_fc2_weight0_0.done ? 1'd1; - } - group upd10<"static"=1> { - temporary_x4_read0_0.write_en = 1'd1; - temporary_x40_0.addr1 = j2.out; - temporary_x40_0.addr0 = i2.out; - temporary_x4_read0_0.in = 1'd1 ? temporary_x40_0.read_data; - upd10[done] = temporary_x4_read0_0.done ? 1'd1; - } - group upd11<"static"=1> { - x40_0_addr1 = j2.out; - x40_0_addr0 = i2.out; - x40_0_write_en = 1'd1; - x40_0_write_data = 1'd1 ? temporary_x4_read0_0.out; - upd11[done] = x40_0_done ? 1'd1; - } - group upd12<"static"=1> { - j2.write_en = 1'd1; - add6.left = j2.out; - add6.right = const19.out; - j2.in = 1'd1 ? add6.out; - upd12[done] = j2.done ? 1'd1; - } - group upd13<"static"=1> { - i2.write_en = 1'd1; - add7.left = i2.out; - add7.right = const20.out; - i2.in = 1'd1 ? add7.out; - upd13[done] = i2.done ? 1'd1; - } - group upd2<"static"=1> { - j0.write_en = 1'd1; - add0.left = j0.out; - add0.right = const4.out; - j0.in = 1'd1 ? add0.out; - upd2[done] = j0.done ? 1'd1; - } - group upd3<"static"=1> { - i0.write_en = 1'd1; - add1.left = i0.out; - add1.right = const5.out; - i0.in = 1'd1 ? add1.out; - upd3[done] = i0.done ? 1'd1; - } - group upd4<"static"=1> { - x3_read0_0.write_en = 1'd1; - x30_0_addr1 = k0.out; - x30_0_addr0 = i1.out; - x3_read0_0.in = 1'd1 ? x30_0_read_data; - upd4[done] = x3_read0_0.done ? 1'd1; - } - group upd5<"static"=1> { - transpose_fc2_weight_read0_0.write_en = 1'd1; - transpose_fc2_weight0_0.addr1 = j1.out; - transpose_fc2_weight0_0.addr0 = k0.out; - transpose_fc2_weight_read0_0.in = 1'd1 ? transpose_fc2_weight0_0.read_data; - upd5[done] = transpose_fc2_weight_read0_0.done ? 1'd1; - } - group upd6<"static"=1> { - temporary_x40_0.addr1 = j1.out; - temporary_x40_0.addr0 = i1.out; - temporary_x40_0.write_en = 1'd1; - add2.left = temporary_x40_0.read_data; - add2.right = product_0.out; - temporary_x40_0.addr1 = j1.out; - temporary_x40_0.addr0 = i1.out; - temporary_x40_0.write_data = 1'd1 ? add2.out; - upd6[done] = temporary_x40_0.done ? 1'd1; - } - group upd7<"static"=1> { - k0.write_en = 1'd1; - add3.left = k0.out; - add3.right = const12.out; - k0.in = 1'd1 ? add3.out; - upd7[done] = k0.done ? 1'd1; - } - group upd8<"static"=1> { - j1.write_en = 1'd1; - add4.left = j1.out; - add4.right = const13.out; - j1.in = 1'd1 ? add4.out; - upd8[done] = j1.done ? 1'd1; - } - group upd9<"static"=1> { - i1.write_en = 1'd1; - add5.left = i1.out; - add5.right = const14.out; - i1.in = 1'd1 ? add5.out; - upd9[done] = i1.done ? 1'd1; - } - } - - control { - seq { - let0; - while le0.out with cond0 { - seq { - let1; - while le1.out with cond1 { - seq { - upd0; - upd1; - upd2; - } - } - upd3; - } - } - let2; - while le2.out with cond2 { - seq { - let3; - while le3.out with cond3 { - seq { - let4; - while le4.out with cond4 { - seq { - par { - upd4; - upd5; - } - let5; - let6; - upd6; - upd7; - } - } - upd8; - } - } - upd9; - } - } - let7; - while le5.out with cond5 { - seq { - let8; - while le6.out with cond6 { - seq { - upd10; - upd11; - upd12; - } - } - upd13; - } - } - } - } -} - -component relu(go: 1, clk: 1, x20_0_read_data: 32, x20_0_done: 1, x30_0_read_data: 32, x30_0_done: 1) -> (done: 1, x20_0_addr0: 1, x20_0_addr1: 8, x20_0_write_data: 32, x20_0_write_en: 1, x20_0_clk: 1, x30_0_addr0: 1, x30_0_addr1: 8, x30_0_write_data: 32, x30_0_write_en: 1, x30_0_clk: 1) { - cells { - add0 = prim std_add(8); - add1 = prim std_add(1); - const0 = prim std_const(1, 0); - const1 = prim std_const(1, 0); - const2 = prim std_const(8, 0); - const3 = prim std_const(8, 127); - const4 = prim std_const(8, 1); - const5 = prim std_const(1, 1); - fpconst0 = prim fixed_p_std_const(32, 16, 16, 0, 0); - gt0 = prim fixed_p_std_gt(32, 16, 16); - i0 = prim std_reg(1); - j0 = prim std_reg(8); - le0 = prim std_le(1); - le1 = prim std_le(8); - x2_read0_0 = prim std_reg(32); - x2_read1_0 = prim std_reg(32); - zero_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group cond2<"static"=0> { - cond2[done] = 1'd1; - gt0.left = x2_read0_0.out; - gt0.right = zero_0.out; - } - group let0<"static"=1> { - zero_0.in = fpconst0.out; - zero_0.write_en = 1'd1; - let0[done] = zero_0.done; - } - group let1<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let1[done] = i0.done; - } - group let2<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let2[done] = j0.done; - } - group upd0<"static"=1> { - x2_read0_0.write_en = 1'd1; - x20_0_addr1 = j0.out; - x20_0_addr0 = i0.out; - x2_read0_0.in = 1'd1 ? x20_0_read_data; - upd0[done] = x2_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - x2_read1_0.write_en = 1'd1; - x20_0_addr1 = j0.out; - x20_0_addr0 = i0.out; - x2_read1_0.in = 1'd1 ? x20_0_read_data; - upd1[done] = x2_read1_0.done ? 1'd1; - } - group upd2<"static"=1> { - x30_0_addr1 = j0.out; - x30_0_addr0 = i0.out; - x30_0_write_en = 1'd1; - x30_0_write_data = 1'd1 ? x2_read1_0.out; - upd2[done] = x30_0_done ? 1'd1; - } - group upd3<"static"=1> { - x30_0_addr1 = j0.out; - x30_0_addr0 = i0.out; - x30_0_write_en = 1'd1; - x30_0_write_data = 1'd1 ? zero_0.out; - upd3[done] = x30_0_done ? 1'd1; - } - group upd4<"static"=1> { - j0.write_en = 1'd1; - add0.left = j0.out; - add0.right = const4.out; - j0.in = 1'd1 ? add0.out; - upd4[done] = j0.done ? 1'd1; - } - group upd5<"static"=1> { - i0.write_en = 1'd1; - add1.left = i0.out; - add1.right = const5.out; - i0.in = 1'd1 ? add1.out; - upd5[done] = i0.done ? 1'd1; - } - } - - control { - seq { - let0; - let1; - while le0.out with cond0 { - seq { - let2; - while le1.out with cond1 { - seq { - upd0; - if gt0.out with cond2 { - seq { - upd1; - upd2; - } - } else { - upd3; - } - upd4; - } - } - upd5; - } - } - } - } -} - -component bias_add(go: 1, clk: 1, fc1_bias0_read_data: 32, fc1_bias0_done: 1, x10_0_read_data: 32, x10_0_done: 1, x20_0_read_data: 32, x20_0_done: 1) -> (done: 1, fc1_bias0_addr0: 8, fc1_bias0_write_data: 32, fc1_bias0_write_en: 1, fc1_bias0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 8, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, x20_0_addr0: 1, x20_0_addr1: 8, x20_0_write_data: 32, x20_0_write_en: 1, x20_0_clk: 1) { - cells { - add0 = prim fixed_p_std_add(32, 16, 16); - add1 = prim std_add(8); - add2 = prim std_add(1); - const0 = prim std_const(1, 0); - const1 = prim std_const(1, 0); - const2 = prim std_const(8, 0); - const3 = prim std_const(8, 127); - const4 = prim std_const(8, 1); - const5 = prim std_const(1, 1); - fc1_bias_read0_0 = prim std_reg(32); - i0 = prim std_reg(1); - j0 = prim std_reg(8); - le0 = prim std_le(1); - le1 = prim std_le(8); - x1_read0_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; - } - group let1<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let1[done] = j0.done; - } - group upd0<"static"=1> { - x1_read0_0.write_en = 1'd1; - x10_0_addr1 = j0.out; - x10_0_addr0 = i0.out; - x1_read0_0.in = 1'd1 ? x10_0_read_data; - upd0[done] = x1_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - fc1_bias_read0_0.write_en = 1'd1; - fc1_bias0_addr0 = j0.out; - fc1_bias_read0_0.in = 1'd1 ? fc1_bias0_read_data; - upd1[done] = fc1_bias_read0_0.done ? 1'd1; - } - group upd2<"static"=1> { - x20_0_addr1 = j0.out; - x20_0_addr0 = i0.out; - x20_0_write_en = 1'd1; - add0.left = x1_read0_0.out; - add0.right = fc1_bias_read0_0.out; - x20_0_write_data = 1'd1 ? add0.out; - upd2[done] = x20_0_done ? 1'd1; - } - group upd3<"static"=1> { - j0.write_en = 1'd1; - add1.left = j0.out; - add1.right = const4.out; - j0.in = 1'd1 ? add1.out; - upd3[done] = j0.done ? 1'd1; - } - group upd4<"static"=1> { - i0.write_en = 1'd1; - add2.left = i0.out; - add2.right = const5.out; - i0.in = 1'd1 ? add2.out; - upd4[done] = i0.done ? 1'd1; - } - } - - control { - seq { - let0; - while le0.out with cond0 { - seq { - let1; - while le1.out with cond1 { - seq { - par { - upd0; - upd1; - } - upd2; - upd3; - } - } - upd4; - } - } - } - } -} - -component dense(go: 1, clk: 1, fc1_weight0_0_read_data: 32, fc1_weight0_0_done: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, fc1_weight0_0_addr0: 8, fc1_weight0_0_addr1: 10, fc1_weight0_0_write_data: 32, fc1_weight0_0_write_en: 1, fc1_weight0_0_clk: 1, x0_0_addr0: 1, x0_0_addr1: 10, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 8, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { - cells { - add0 = prim std_add(10); - add1 = prim std_add(8); - add2 = prim fixed_p_std_add(32, 16, 16); - add3 = prim std_add(10); - add4 = prim std_add(8); - add5 = prim std_add(1); - add6 = prim std_add(8); - add7 = prim std_add(1); - bin_read0_0 = prim std_reg(32); - const0 = prim std_const(8, 0); - const1 = prim std_const(8, 127); - const10 = prim std_const(10, 0); - const11 = prim std_const(10, 783); - const12 = prim std_const(10, 1); - const13 = prim std_const(8, 1); - const14 = prim std_const(1, 1); - const15 = prim std_const(1, 0); - const16 = prim std_const(1, 0); - const17 = prim std_const(8, 0); - const18 = prim std_const(8, 127); - const19 = prim std_const(8, 1); - const2 = prim std_const(10, 0); - const20 = prim std_const(1, 1); - const3 = prim std_const(10, 783); - const4 = prim std_const(10, 1); - const5 = prim std_const(8, 1); - const6 = prim std_const(1, 0); - const7 = prim std_const(1, 0); - const8 = prim std_const(8, 0); - const9 = prim std_const(8, 127); - fc1_weight_read0_0 = prim std_reg(32); - i0 = prim std_reg(8); - i1 = prim std_reg(1); - i2 = prim std_reg(1); - j0 = prim std_reg(10); - j1 = prim std_reg(8); - j2 = prim std_reg(8); - k0 = prim std_reg(10); - le0 = prim std_le(8); - le1 = prim std_le(10); - le2 = prim std_le(1); - le3 = prim std_le(8); - le4 = prim std_le(10); - le5 = prim std_le(1); - le6 = prim std_le(8); - mult_pipe0 = prim std_mult_pipe(32); - product_0 = prim std_reg(32); - slice0 = prim std_slice(32, 32); - slice1 = prim std_slice(32, 32); - temporary_x10_0 = prim std_mem_d2(32, 1, 128, 1, 8); - temporary_x1_read0_0 = prim std_reg(32); - transpose_fc1_weight0_0 = prim std_mem_d2(32, 784, 128, 10, 8); - transpose_fc1_weight_read0_0 = prim std_reg(32); - x_read0_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group cond2<"static"=0> { - cond2[done] = 1'd1; - le2.left = i1.out; - le2.right = const7.out; - } - group cond3<"static"=0> { - cond3[done] = 1'd1; - le3.left = j1.out; - le3.right = const9.out; - } - group cond4<"static"=0> { - cond4[done] = 1'd1; - le4.left = k0.out; - le4.right = const11.out; - } - group cond5<"static"=0> { - cond5[done] = 1'd1; - le5.left = i2.out; - le5.right = const16.out; - } - group cond6<"static"=0> { - cond6[done] = 1'd1; - le6.left = j2.out; - le6.right = const18.out; - } - group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; - } - group let1<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let1[done] = j0.done; - } - group let2<"static"=1> { - i1.in = const6.out; - i1.write_en = 1'd1; - let2[done] = i1.done; - } - group let3<"static"=1> { - j1.in = const8.out; - j1.write_en = 1'd1; - let3[done] = j1.done; - } - group let4<"static"=1> { - k0.in = const10.out; - k0.write_en = 1'd1; - let4[done] = k0.done; - } - group let5<"static"=1> { - bin_read0_0.in = slice0.out; - bin_read0_0.write_en = 1'd1; - let5[done] = bin_read0_0.done; - slice0.in = mult_pipe0.out; - mult_pipe0.left = x_read0_0.out; - mult_pipe0.right = transpose_fc1_weight_read0_0.out; - mult_pipe0.go = !mult_pipe0.done ? 1'd1; - } - group let6<"static"=1> { - product_0.in = slice1.out; - product_0.write_en = 1'd1; - let6[done] = product_0.done; - slice1.in = bin_read0_0.out; - } - group let7<"static"=1> { - i2.in = const15.out; - i2.write_en = 1'd1; - let7[done] = i2.done; - } - group let8<"static"=1> { - j2.in = const17.out; - j2.write_en = 1'd1; - let8[done] = j2.done; - } - group upd0<"static"=1> { - fc1_weight_read0_0.write_en = 1'd1; - fc1_weight0_0_addr1 = j0.out; - fc1_weight0_0_addr0 = i0.out; - fc1_weight_read0_0.in = 1'd1 ? fc1_weight0_0_read_data; - upd0[done] = fc1_weight_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - transpose_fc1_weight0_0.addr1 = i0.out; - transpose_fc1_weight0_0.addr0 = j0.out; - transpose_fc1_weight0_0.write_en = 1'd1; - transpose_fc1_weight0_0.write_data = 1'd1 ? fc1_weight_read0_0.out; - upd1[done] = transpose_fc1_weight0_0.done ? 1'd1; - } - group upd10<"static"=1> { - temporary_x1_read0_0.write_en = 1'd1; - temporary_x10_0.addr1 = j2.out; - temporary_x10_0.addr0 = i2.out; - temporary_x1_read0_0.in = 1'd1 ? temporary_x10_0.read_data; - upd10[done] = temporary_x1_read0_0.done ? 1'd1; - } - group upd11<"static"=1> { - x10_0_addr1 = j2.out; - x10_0_addr0 = i2.out; - x10_0_write_en = 1'd1; - x10_0_write_data = 1'd1 ? temporary_x1_read0_0.out; - upd11[done] = x10_0_done ? 1'd1; - } - group upd12<"static"=1> { - j2.write_en = 1'd1; - add6.left = j2.out; - add6.right = const19.out; - j2.in = 1'd1 ? add6.out; - upd12[done] = j2.done ? 1'd1; - } - group upd13<"static"=1> { - i2.write_en = 1'd1; - add7.left = i2.out; - add7.right = const20.out; - i2.in = 1'd1 ? add7.out; - upd13[done] = i2.done ? 1'd1; - } - group upd2<"static"=1> { - j0.write_en = 1'd1; - add0.left = j0.out; - add0.right = const4.out; - j0.in = 1'd1 ? add0.out; - upd2[done] = j0.done ? 1'd1; - } - group upd3<"static"=1> { - i0.write_en = 1'd1; - add1.left = i0.out; - add1.right = const5.out; - i0.in = 1'd1 ? add1.out; - upd3[done] = i0.done ? 1'd1; - } - group upd4<"static"=1> { - x_read0_0.write_en = 1'd1; - x0_0_addr1 = k0.out; - x0_0_addr0 = i1.out; - x_read0_0.in = 1'd1 ? x0_0_read_data; - upd4[done] = x_read0_0.done ? 1'd1; - } - group upd5<"static"=1> { - transpose_fc1_weight_read0_0.write_en = 1'd1; - transpose_fc1_weight0_0.addr1 = j1.out; - transpose_fc1_weight0_0.addr0 = k0.out; - transpose_fc1_weight_read0_0.in = 1'd1 ? transpose_fc1_weight0_0.read_data; - upd5[done] = transpose_fc1_weight_read0_0.done ? 1'd1; - } - group upd6<"static"=1> { - temporary_x10_0.addr1 = j1.out; - temporary_x10_0.addr0 = i1.out; - temporary_x10_0.write_en = 1'd1; - add2.left = temporary_x10_0.read_data; - add2.right = product_0.out; - temporary_x10_0.addr1 = j1.out; - temporary_x10_0.addr0 = i1.out; - temporary_x10_0.write_data = 1'd1 ? add2.out; - upd6[done] = temporary_x10_0.done ? 1'd1; - } - group upd7<"static"=1> { - k0.write_en = 1'd1; - add3.left = k0.out; - add3.right = const12.out; - k0.in = 1'd1 ? add3.out; - upd7[done] = k0.done ? 1'd1; - } - group upd8<"static"=1> { - j1.write_en = 1'd1; - add4.left = j1.out; - add4.right = const13.out; - j1.in = 1'd1 ? add4.out; - upd8[done] = j1.done ? 1'd1; - } - group upd9<"static"=1> { - i1.write_en = 1'd1; - add5.left = i1.out; - add5.right = const14.out; - i1.in = 1'd1 ? add5.out; - upd9[done] = i1.done ? 1'd1; - } - } - - control { - seq { - let0; - while le0.out with cond0 { - seq { - let1; - while le1.out with cond1 { - seq { - upd0; - upd1; - upd2; - } - } - upd3; - } - } - let2; - while le2.out with cond2 { - seq { - let3; - while le3.out with cond3 { - seq { - let4; - while le4.out with cond4 { - seq { - par { - upd4; - upd5; - } - let5; - let6; - upd6; - upd7; - } - } - upd8; - } - } - upd9; - } - } - let7; - while le5.out with cond5 { - seq { - let8; - while le6.out with cond6 { - seq { - upd10; - upd11; - upd12; - } - } - upd13; - } - } - } - } -} - -component batch_flatten(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, x0_0_read_data: 32, x0_0_done: 1) -> (done: 1, data0_0_0_0_addr0: 1, data0_0_0_0_addr1: 1, data0_0_0_0_addr2: 5, data0_0_0_0_addr3: 5, data0_0_0_0_write_data: 32, data0_0_0_0_write_en: 1, data0_0_0_0_clk: 1, x0_0_addr0: 1, x0_0_addr1: 10, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1) { - cells { - add0 = prim std_add(10); - add1 = prim std_add(5); - add2 = prim std_add(5); - add3 = prim std_add(1); - add4 = prim std_add(1); - const0 = prim std_const(10, 0); - const1 = prim std_const(1, 0); - const10 = prim std_const(5, 1); - const11 = prim std_const(5, 1); - const12 = prim std_const(1, 1); - const13 = prim std_const(1, 1); - const2 = prim std_const(1, 0); - const3 = prim std_const(1, 0); - const4 = prim std_const(1, 0); - const5 = prim std_const(5, 0); - const6 = prim std_const(5, 27); - const7 = prim std_const(5, 0); - const8 = prim std_const(5, 27); - const9 = prim std_const(10, 1); - data_read0_0 = prim std_reg(32); - i0 = prim std_reg(1); - j0 = prim std_reg(1); - k0 = prim std_reg(5); - l0 = prim std_reg(5); - le0 = prim std_le(1); - le1 = prim std_le(1); - le2 = prim std_le(5); - le3 = prim std_le(5); - m_0 = prim std_reg(10); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const2.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const4.out; - } - group cond2<"static"=0> { - cond2[done] = 1'd1; - le2.left = k0.out; - le2.right = const6.out; - } - group cond3<"static"=0> { - cond3[done] = 1'd1; - le3.left = l0.out; - le3.right = const8.out; - } - group let0<"static"=1> { - m_0.in = const0.out; - m_0.write_en = 1'd1; - let0[done] = m_0.done; - } - group let1<"static"=1> { - i0.in = const1.out; - i0.write_en = 1'd1; - let1[done] = i0.done; - } - group let2<"static"=1> { - j0.in = const3.out; - j0.write_en = 1'd1; - let2[done] = j0.done; - } - group let3<"static"=1> { - k0.in = const5.out; - k0.write_en = 1'd1; - let3[done] = k0.done; - } - group let4<"static"=1> { - l0.in = const7.out; - l0.write_en = 1'd1; - let4[done] = l0.done; - } - group upd0<"static"=1> { - data_read0_0.write_en = 1'd1; - data0_0_0_0_addr3 = l0.out; - data0_0_0_0_addr2 = k0.out; - data0_0_0_0_addr1 = j0.out; - data0_0_0_0_addr0 = i0.out; - data_read0_0.in = 1'd1 ? data0_0_0_0_read_data; - upd0[done] = data_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - x0_0_addr1 = m_0.out; - x0_0_addr0 = i0.out; - x0_0_write_en = 1'd1; - x0_0_write_data = 1'd1 ? data_read0_0.out; - upd1[done] = x0_0_done ? 1'd1; - } - group upd2<"static"=1> { - m_0.write_en = 1'd1; - add0.left = m_0.out; - add0.right = const9.out; - m_0.in = 1'd1 ? add0.out; - upd2[done] = m_0.done ? 1'd1; - } - group upd3<"static"=1> { - l0.write_en = 1'd1; - add1.left = l0.out; - add1.right = const10.out; - l0.in = 1'd1 ? add1.out; - upd3[done] = l0.done ? 1'd1; - } - group upd4<"static"=1> { - k0.write_en = 1'd1; - add2.left = k0.out; - add2.right = const11.out; - k0.in = 1'd1 ? add2.out; - upd4[done] = k0.done ? 1'd1; - } - group upd5<"static"=1> { - j0.write_en = 1'd1; - add3.left = j0.out; - add3.right = const12.out; - j0.in = 1'd1 ? add3.out; - upd5[done] = j0.done ? 1'd1; - } - group upd6<"static"=1> { - i0.write_en = 1'd1; - add4.left = i0.out; - add4.right = const13.out; - i0.in = 1'd1 ? add4.out; - upd6[done] = i0.done ? 1'd1; - } - } - - control { - seq { - let0; - let1; - while le0.out with cond0 { - seq { - let2; - while le1.out with cond1 { - seq { - let3; - while le2.out with cond2 { - seq { - let4; - while le3.out with cond3 { - seq { - upd0; - upd1; - upd2; - upd3; - } - } - upd4; - } - } - upd5; - } - } - upd6; - } - } - } - } -} - -component main () -> () { - cells { - x9 = prim std_mem_d2(32, 1, 10, 1, 4); - x8 = prim std_mem_d2(32, 1, 10, 1, 4); - softmax0 = softmax; - x7 = prim std_mem_d2(32, 1, 10, 1, 4); - fc3_bias = prim std_mem_d1(32, 10, 4); - bias_add2 = bias_add2; - x6 = prim std_mem_d2(32, 1, 64, 1, 7); - fc3_weight = prim std_mem_d2(32, 10, 64, 4, 7); - dense2 = dense2; - x5 = prim std_mem_d2(32, 1, 64, 1, 7); - relu1 = relu1; - x4 = prim std_mem_d2(32, 1, 64, 1, 7); - fc2_bias = prim std_mem_d1(32, 64, 7); - bias_add1 = bias_add1; - x3 = prim std_mem_d2(32, 1, 128, 1, 8); - fc2_weight = prim std_mem_d2(32, 64, 128, 7, 8); - dense1 = dense1; - x2 = prim std_mem_d2(32, 1, 128, 1, 8); - relu0 = relu; - x1 = prim std_mem_d2(32, 1, 128, 1, 8); - fc1_bias = prim std_mem_d1(32, 128, 8); - bias_add0 = bias_add; - x = prim std_mem_d2(32, 1, 784, 1, 10); - fc1_weight = prim std_mem_d2(32, 128, 784, 8, 10); - dense0 = dense; - data = prim std_mem_d4(32, 1, 1, 28, 28, 1, 1, 5, 5); - batch_flatten0 = batch_flatten; - } - wires { - group run_batch_flatten { - data.addr0 = batch_flatten0.data0_0_0_0_addr0; - batch_flatten0.data0_0_0_0_read_data = data.read_data; - data.addr1 = batch_flatten0.data0_0_0_0_addr1; - data.addr2 = batch_flatten0.data0_0_0_0_addr2; - data.addr3 = batch_flatten0.data0_0_0_0_addr3; - x.addr0 = batch_flatten0.x0_0_addr0; - x.addr1 = batch_flatten0.x0_0_addr1; - x.write_data = batch_flatten0.x0_0_write_data; - x.write_en = batch_flatten0.x0_0_write_en; - batch_flatten0.x0_0_done = x.done; - batch_flatten0.go = 1'd1; - run_batch_flatten[done] = batch_flatten0.done ? 1'd1; - } - group run_dense { - x.addr0 = dense0.x0_0_addr0; - dense0.x0_0_read_data = x.read_data; - x.addr1 = dense0.x0_0_addr1; - fc1_weight.addr0 = dense0.fc1_weight0_0_addr0; - dense0.fc1_weight0_0_read_data = fc1_weight.read_data; - fc1_weight.addr1 = dense0.fc1_weight0_0_addr1; - x1.addr0 = dense0.x10_0_addr0; - x1.addr1 = dense0.x10_0_addr1; - x1.write_data = dense0.x10_0_write_data; - x1.write_en = dense0.x10_0_write_en; - dense0.x10_0_done = x1.done; - dense0.go = 1'd1; - run_dense[done] = dense0.done ? 1'd1; - } - group run_bias_add { - x1.addr0 = bias_add0.x10_0_addr0; - bias_add0.x10_0_read_data = x1.read_data; - x1.addr1 = bias_add0.x10_0_addr1; - fc1_bias.addr0 = bias_add0.fc1_bias0_addr0; - bias_add0.fc1_bias0_read_data = fc1_bias.read_data; - x2.addr0 = bias_add0.x20_0_addr0; - x2.addr1 = bias_add0.x20_0_addr1; - x2.write_data = bias_add0.x20_0_write_data; - x2.write_en = bias_add0.x20_0_write_en; - bias_add0.x20_0_done = x2.done; - bias_add0.go = 1'd1; - run_bias_add[done] = bias_add0.done ? 1'd1; - } - group run_relu { - x2.addr0 = relu0.x20_0_addr0; - relu0.x20_0_read_data = x2.read_data; - x2.addr1 = relu0.x20_0_addr1; - x3.addr0 = relu0.x30_0_addr0; - x3.addr1 = relu0.x30_0_addr1; - x3.write_data = relu0.x30_0_write_data; - x3.write_en = relu0.x30_0_write_en; - relu0.x30_0_done = x3.done; - relu0.go = 1'd1; - run_relu[done] = relu0.done ? 1'd1; - } - group run_dense1 { - x3.addr0 = dense1.x30_0_addr0; - dense1.x30_0_read_data = x3.read_data; - x3.addr1 = dense1.x30_0_addr1; - fc2_weight.addr0 = dense1.fc2_weight0_0_addr0; - dense1.fc2_weight0_0_read_data = fc2_weight.read_data; - fc2_weight.addr1 = dense1.fc2_weight0_0_addr1; - x4.addr0 = dense1.x40_0_addr0; - x4.addr1 = dense1.x40_0_addr1; - x4.write_data = dense1.x40_0_write_data; - x4.write_en = dense1.x40_0_write_en; - dense1.x40_0_done = x4.done; - dense1.go = 1'd1; - run_dense1[done] = dense1.done ? 1'd1; - } - group run_bias_add1 { - x4.addr0 = bias_add1.x40_0_addr0; - bias_add1.x40_0_read_data = x4.read_data; - x4.addr1 = bias_add1.x40_0_addr1; - fc2_bias.addr0 = bias_add1.fc2_bias0_addr0; - bias_add1.fc2_bias0_read_data = fc2_bias.read_data; - x5.addr0 = bias_add1.x50_0_addr0; - x5.addr1 = bias_add1.x50_0_addr1; - x5.write_data = bias_add1.x50_0_write_data; - x5.write_en = bias_add1.x50_0_write_en; - bias_add1.x50_0_done = x5.done; - bias_add1.go = 1'd1; - run_bias_add1[done] = bias_add1.done ? 1'd1; - } - group run_relu1 { - x5.addr0 = relu1.x50_0_addr0; - relu1.x50_0_read_data = x5.read_data; - x5.addr1 = relu1.x50_0_addr1; - x6.addr0 = relu1.x60_0_addr0; - x6.addr1 = relu1.x60_0_addr1; - x6.write_data = relu1.x60_0_write_data; - x6.write_en = relu1.x60_0_write_en; - relu1.x60_0_done = x6.done; - relu1.go = 1'd1; - run_relu1[done] = relu1.done ? 1'd1; - } - group run_dense2 { - x6.addr0 = dense2.x60_0_addr0; - dense2.x60_0_read_data = x6.read_data; - x6.addr1 = dense2.x60_0_addr1; - fc3_weight.addr0 = dense2.fc3_weight0_0_addr0; - dense2.fc3_weight0_0_read_data = fc3_weight.read_data; - fc3_weight.addr1 = dense2.fc3_weight0_0_addr1; - x7.addr0 = dense2.x70_0_addr0; - x7.addr1 = dense2.x70_0_addr1; - x7.write_data = dense2.x70_0_write_data; - x7.write_en = dense2.x70_0_write_en; - dense2.x70_0_done = x7.done; - dense2.go = 1'd1; - run_dense2[done] = dense2.done ? 1'd1; - } - group run_bias_add2 { - x7.addr0 = bias_add2.x70_0_addr0; - bias_add2.x70_0_read_data = x7.read_data; - x7.addr1 = bias_add2.x70_0_addr1; - fc3_bias.addr0 = bias_add2.fc3_bias0_addr0; - bias_add2.fc3_bias0_read_data = fc3_bias.read_data; - x8.addr0 = bias_add2.x80_0_addr0; - x8.addr1 = bias_add2.x80_0_addr1; - x8.write_data = bias_add2.x80_0_write_data; - x8.write_en = bias_add2.x80_0_write_en; - bias_add2.x80_0_done = x8.done; - bias_add2.go = 1'd1; - run_bias_add2[done] = bias_add2.done ? 1'd1; - } - group run_softmax { - x8.addr0 = softmax0.x80_0_addr0; - softmax0.x80_0_read_data = x8.read_data; - x8.addr1 = softmax0.x80_0_addr1; - x9.addr0 = softmax0.x90_0_addr0; - x9.addr1 = softmax0.x90_0_addr1; - x9.write_data = softmax0.x90_0_write_data; - x9.write_en = softmax0.x90_0_write_en; - softmax0.x90_0_done = x9.done; - softmax0.go = 1'd1; - run_softmax[done] = softmax0.done ? 1'd1; - } - } - control { - seq { - run_batch_flatten; - run_dense; - run_bias_add; - run_relu; - run_dense1; - run_bias_add1; - run_relu1; - run_dense2; - run_bias_add2; - run_softmax; - } - } -} diff --git a/frontends/relay-futil/tests/mlp_net.relay b/frontends/relay-futil/tests/mlp_net.relay deleted file mode 100644 index 4368b51016..0000000000 --- a/frontends/relay-futil/tests/mlp_net.relay +++ /dev/null @@ -1,16 +0,0 @@ -v0.0.4 -fn (%data: Tensor[(1, 1, 28, 28), float32], %fc1_weight: Tensor[(128, 784), float32], %fc1_bias: Tensor[(128), float32], - %fc2_weight: Tensor[(64, 128), float32], %fc2_bias: Tensor[(64), float32], %fc3_weight: Tensor[(10, 64), float32], - %fc3_bias: Tensor[(10), float32]) -> Tensor[(1, 10), float32] { - let %x: Tensor[(1, 784), float32] = nn.batch_flatten(%data) /* ty=Tensor[(1, 784), float32] */; - let %x1: Tensor[(1, 128), float32] = nn.dense(%x, %fc1_weight, units=128) /* ty=Tensor[(1, 128), float32] */; - let %x2: Tensor[(1, 128), float32] = nn.bias_add(%x1, %fc1_bias, axis=-1) /* ty=Tensor[(1, 128), float32] */; - let %x3: Tensor[(1, 128), float32] = nn.relu(%x2) /* ty=Tensor[(1, 128), float32] */; - let %x4: Tensor[(1, 64), float32] = nn.dense(%x3, %fc2_weight, units=64) /* ty=Tensor[(1, 64), float32] */; - let %x5: Tensor[(1, 64), float32] = nn.bias_add(%x4, %fc2_bias, axis=-1) /* ty=Tensor[(1, 64), float32] */; - let %x6: Tensor[(1, 64), float32] = nn.relu(%x5) /* ty=Tensor[(1, 64), float32] */; - let %x7: Tensor[(1, 10), float32] = nn.dense(%x6, %fc3_weight, units=10) /* ty=Tensor[(1, 10), float32] */; - let %x8: Tensor[(1, 10), float32] = nn.bias_add(%x7, %fc3_bias, axis=-1) /* ty=Tensor[(1, 10), float32] */; - let %x9: Tensor[(1, 10), float32] = nn.softmax(%x8) /* ty=Tensor[(1, 10), float32] */; - %x9 -} From 89cda694dd377dcd267af08d36f76a250a555dd9 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Mon, 7 Dec 2020 22:02:13 -0500 Subject: [PATCH 71/75] Cleanup. --- frontends/relay-futil/compiler.py | 82 +++++------------ frontends/relay-futil/dahlia_functions.py | 69 +++++++++++---- frontends/relay-futil/futil_ast.py | 59 ++++--------- frontends/relay-futil/pretty_print.py | 80 +++++++++++------ frontends/relay-futil/utilities.py | 103 +++++++++++----------- 5 files changed, 197 insertions(+), 196 deletions(-) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 4b8b5c22e8..4dd0de566e 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -9,19 +9,6 @@ from futil_ast import * from dahlia_functions import * -# Mapping from Relay binary calls to the respective Dahlia operator. -BuiltInBinaryOps = {'add': '+', 'divide': '/', 'multiply': '*', 'subtract': '-'} - -# Mapping from Relay function names to their respective Dahlia lowering. -RelayFunctionCalls = {'nn.dense': dense, 'nn.batch_flatten': batch_flatten, 'nn.batch_matmul': batch_matmul, - 'nn.bias_add': bias_add, 'nn.relu': relu, 'nn.softmax': softmax, 'nn.max_pool2d': max_pool2d, - 'nn.conv2d': conv2d, 'negative': negative, 'expand_dims': expand_dims, 'sqrt': sqrt} - -# Mapping between primitive type and associated Dahlia name extension. -# E.g. A 2D memory primitive named `A` will be lowered to `A0_0`. -DahliaNameExtension = {PrimitiveType.Memory1D: '0', PrimitiveType.Memory2D: '0_0', - PrimitiveType.Memory3D: '0_0_0', PrimitiveType.Memory4D: '0_0_0_0'} - class Relay2Futil(ExprFunctor): """The main compilation visitor.""" @@ -30,17 +17,16 @@ def __init__(self): super(Relay2Futil, self).__init__() self.id_dictionary = defaultdict(int) self.relay_id_dictionary = defaultdict(int) - self.dahlia_components = [] - self.main = FComponent(name="main", cells=[], wires=[]) + self.main = FComponent(name="main") def id(self, name): """ Provides a unique identification for a given name. For example, if 'a' is seen three times, it will produce: 'a0', 'a1', 'a2'. """ - id_number = self.id_dictionary[name] + id_number = str(self.id_dictionary[name]) self.id_dictionary[name] += 1 - return name + str(id_number) + return ''.join((name, id_number)) def relay_id(self, name): """ @@ -54,7 +40,7 @@ def relay_id(self, name): id_number = self.relay_id_dictionary[name] self.relay_id_dictionary[name] += 1 if id_number == 0: return name - return name + str(id_number) + return ''.join((name, str(id_number))) def dahlia_name(self, name, type): """ @@ -64,69 +50,47 @@ def dahlia_name(self, name, type): Memory3D: `X0_0_0`, `Y0_0_0` """ assert type in DahliaNameExtension, f'{name} with {type} is not supported yet.' - return name + DahliaNameExtension[type] + return ''.join((name, DahliaNameExtension[type])) - def get_dahlia_declaration(self, function_name, cells, args, attrs): - """ - Returns the corresponding name, Dahlia function type, and op (if it is a binary op, otherwise None). - If the function type isn't supported, fails with an assertion. - """ - input_type = cells[0].primitive.type - function = name = op = None - if function_name in BuiltInBinaryOps: - op = BuiltInBinaryOps[function_name] - function, name = broadcast, function_name - elif function_name in RelayFunctionCalls: - function = RelayFunctionCalls[function_name] - name = function.__name__ - else: - assert False, f'{function_name} is not supported for lowering to FuTIL.' - return DahliaDeclaration(component_name=self.relay_id(name), decl_name=self.id(name), - op=op, inputs=args, attributes=attrs, function=function) - - def visit_var(self, var): + def visit_var(self, var) -> FCell: name = self.relay_id(var.name_hint) - # Do not add duplicate primitives to main. - if self.main.contains_primitive(name): return cell + if name in self.main.cells: return cell data, type, data_type = get_memory_parameters(var.type_annotation) - dahlia_name = self.dahlia_name(name, type) - return FCell(dahlia_name=dahlia_name, + return FCell(dahlia_name=self.dahlia_name(name, type), primitive=FPrimitive(name=name, data=data, data_type=data_type, type=type)) def visit_let(self, let): values, output = self.visit(let.value), self.visit(let.var) if isinstance(values, list): for value in values: - if not value.is_dahlia_declaration(): continue - value.dahlia_declaration.output = output - value.dahlia_declaration.invoke() + if value.is_relay_function(): value.relay_function.output = output return [self.visit(let.body), values] - def visit_constant(self, const): + def visit_constant(self, const) -> FCell: # Note: We're currently treating constants defined in a `let` statement in Relay IR as 1D Memory. - type, shape = const.data.dtype, const.data.shape - name, data = self.id("const"), [get_bitwidth(type), int(const.data.asnumpy())] - data_type = get_memory_parameters(type) - return FCell(primitive=FPrimitive(name=name, data=data, data_type=data_type, type=PrimitiveType.Constant)) + # type, shape = const.data.dtype, const.data.shape + pass - def visit_call(self, call): + def visit_call(self, call) -> List[FCell]: attributes = call.attrs cells, args = [], [] for arg in call.args: argument = self.visit(arg) cells.append(argument) args.append(argument) - cells.append(FCell(dahlia_declaration=self.get_dahlia_declaration(call.op.name, cells, args, call.attrs))) + # We are representing all function calls in Relay IR at the Dahlia level, which will then be lowered to FuTIL. + # Note, the Relay function's output is not defined until the `let` statement is visited. + function, name, op = GetRelayFunctionCall(call.op.name) + relay_function_call = RelayFunctionCall(component_name=self.relay_id(name), name=self.id(name), op=op, + inputs=args, attributes=call.attrs, lowering_function=function) + cells.append(FCell(relay_function=relay_function_call)) return cells def visit_function(self, function): body = self.visit(function.body) - for cell in flatten(body): - self.main.add_cell(cell) - if not cell.is_dahlia_declaration(): continue - self.dahlia_components.append(cell.dahlia_declaration.program) + for cell in flatten(body): self.main.add_cell(cell) build_main_controls(self.main) - return pp_component(self.main) + return pp_lowered_relay_function(self.main) def relay_transforms(expr: Function) -> Function: @@ -149,9 +113,7 @@ def lower_to_futil(program) -> str: PREAMBLE = """import "primitives/std.lib";\n""" MAIN = visitor.visit(program) - DAHLIA_COMPONENTS = '\n'.join(visitor.dahlia_components) - NEWL = '\n' - return f'{PREAMBLE}{NEWL}{DAHLIA_COMPONENTS}{NEWL}{MAIN}' + return '\n'.join((PREAMBLE, MAIN)) if __name__ == '__main__': diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py index 1822c45c4f..a3af6206e6 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_functions.py @@ -7,11 +7,11 @@ IMPORT_STATEMENT = """import "primitives/std.lib";\n""" NO_ERR = "2>/dev/null" -CHARACTER_I = chr(ord('i')) NEWL = '\n' +CHARACTER_I = chr(ord('i')) # Starting index variable name for Dahlia array iteration. -def lower_dahlia_program(prog, component_name): +def LowerDahliaProgramToFuTIL(program, component_name): """ Takes in a string representation of a Dahlia program, lowers it to FuTIL with the given `component_name`, and applies the `externalize` pass. This pass exposes the inputs and outputs of primitive types that are @@ -35,7 +35,7 @@ def lower_dahlia_program(prog, component_name): ... } """ - program_string = '\n'.join(prog.splitlines()) + program_string = '\n'.join(program.splitlines()) with NamedTemporaryFile() as tf0, NamedTemporaryFile() as tf1, NamedTemporaryFile() as tf2: tf0.write(bytes(program_string, 'UTF-8')) tf0.seek(0), tf1.seek(0), tf2.seek(0) @@ -48,6 +48,10 @@ def lower_dahlia_program(prog, component_name): return component +#################################################################################################### +################################ Dahlia Implementations ############################################ +#################################################################################################### + def broadcast(declaration): """ https://numpy.org/doc/stable/user/basics.broadcasting.html @@ -110,7 +114,7 @@ def broadcast(declaration): program_body = pp_dahlia_loop(res, loop_body) declarations = pp_dahlia_memory_declarations([res, op1, op2]) program = f"""{declarations}{NEWL}{program_body}""" - return lower_dahlia_program(program, declaration.component_name) + return LowerDahliaProgramToFuTIL(program, declaration.component_name) def batch_flatten(declaration): @@ -134,7 +138,7 @@ def batch_flatten(declaration): body = f"{res.name}{res_indices} := {data.name}{data_indices}; {variable_name} := {variable_name} + 1;" program_body = pp_dahlia_loop(data, body) program = f"""{declarations}{NEWL}{let_flattened}{NEWL}{program_body}""" - return lower_dahlia_program(program, declaration.component_name) + return LowerDahliaProgramToFuTIL(program, declaration.component_name) def bias_add(declaration): @@ -158,7 +162,7 @@ def bias_add(declaration): declarations = pp_dahlia_memory_declarations([data, bias, res]) body = (f"{res.name}{data_indices} := {data.name}{data_indices} + {bias.name}{bias_index};") program_body = pp_dahlia_loop(data, body) - return lower_dahlia_program(f"""{declarations}{NEWL}{program_body}""", declaration.component_name) + return LowerDahliaProgramToFuTIL(f"""{declarations}{NEWL}{program_body}""", declaration.component_name) # TODO(cgyurgyik): @@ -184,7 +188,8 @@ def relu(declaration): body = f"""if ({data.name}{indices} > zero) {{ {res.name}{indices} := {data.name}{indices}; }} else {{ {res.name}{indices} := zero; }}""" program_body = pp_dahlia_loop(data, body) - return lower_dahlia_program(f"""{declarations}{NEWL}{let_zero}{NEWL}{program_body}""", declaration.component_name) + return LowerDahliaProgramToFuTIL(f"""{declarations}{NEWL}{let_zero}{NEWL}{program_body}""", + declaration.component_name) # TODO(cgyurgyik): Similar to ReLU, this requires signed operands. @@ -203,12 +208,11 @@ def negative(declaration): declarations = pp_dahlia_memory_declarations([op, res]) zero = '0.0' if data_type == 'ufix' or data_type == 'fix' else '0' program_body = pp_dahlia_loop(op, f"""{res.name}{indices} := {zero} - {op.name}{indices};""") - return lower_dahlia_program(f"""{declarations}{NEWL}{program_body}""", declaration.component_name) + return LowerDahliaProgramToFuTIL(f"""{declarations}{NEWL}{program_body}""", declaration.component_name) -# TODO(cgyurgyik): Similar to ReLU, this requires signed operands. def sqrt(declaration): - """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.negative""" + """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.sqrt""" op, res = declaration.inputs[0].primitive, declaration.output.primitive bitwidth, num_dimensions, data_type = op.data[0], op.type, op.data_type include_sqrt = f"""import "fxp_sqrt.h" {{ def sqrt(value: {data_type}<{bitwidth}>): {data_type}<{bitwidth}>; }}""" @@ -222,8 +226,8 @@ def sqrt(declaration): declarations = pp_dahlia_memory_declarations([op, res]) program_body = pp_dahlia_loop(op, f"""{res.name}{indices} := sqrt({op.name}{indices});""") - return lower_dahlia_program(f"""{include_sqrt}{NEWL}{declarations}{NEWL}{program_body}""", - declaration.component_name) + return LowerDahliaProgramToFuTIL(f"""{include_sqrt}{NEWL}{declarations}{NEWL}{program_body}""", + declaration.component_name) def expand_dims(declaration): @@ -246,8 +250,7 @@ def expand_dims(declaration): variable_name = next_character(variable_name) program_body = pp_dahlia_loop(data, f'{res.name}{res_indices} := {data.name}{data_indices}') - program = f"""{declarations}{NEWL}{program_body}""" - return lower_dahlia_program(program, declaration.component_name) + return LowerDahliaProgramToFuTIL(f"""{declarations}{NEWL}{program_body}""", declaration.component_name) def batch_matmul(declaration): @@ -294,7 +297,7 @@ def batch_matmul(declaration): }} }} """ - return lower_dahlia_program(program, declaration.component_name) + return LowerDahliaProgramToFuTIL(program, declaration.component_name) # TODO(cgyurgyik): Similar to batch_matmul, this requires a temporary memory to store the output @@ -331,7 +334,7 @@ def dense(declaration): }} }} """ - return lower_dahlia_program(program, declaration.component_name) + return LowerDahliaProgramToFuTIL(program, declaration.component_name) # TODO(cgyurgyik): Currently, only supports a small subset (namely those used in our VGG net and MLP net examples). @@ -362,7 +365,8 @@ def softmax(declaration): }} """ program = f"""{import_exp}{NEWL}{declarations}{body}""" - return lower_dahlia_program(program, declaration.component_name) + + return LowerDahliaProgramToFuTIL(program, declaration.component_name) def max_pool2d(declaration): @@ -403,7 +407,7 @@ def max_pool2d(declaration): }} """ program = f"""{declarations}{NEWL}{program_body}""" - return lower_dahlia_program(program, declaration.component_name) + return LowerDahliaProgramToFuTIL(program, declaration.component_name) # Only supports a small subset of the `conv2d` function. For example, @@ -443,4 +447,31 @@ def conv2d(declaration): }} """ program = f"""{declarations}{NEWL}{program_body}""" - return lower_dahlia_program(program, declaration.component_name) + return LowerDahliaProgramToFuTIL(program, declaration.component_name) + + +# Mapping from Relay function names to their respective Dahlia lowering. +RelayFunctionCalls = {'nn.dense': dense, 'nn.batch_flatten': batch_flatten, 'nn.batch_matmul': batch_matmul, + 'nn.bias_add': bias_add, 'nn.relu': relu, 'nn.softmax': softmax, 'nn.max_pool2d': max_pool2d, + 'nn.conv2d': conv2d, 'negative': negative, 'expand_dims': expand_dims, 'sqrt': sqrt} + +# Mapping from Relay binary calls to the respective Dahlia operator. +BuiltInBinaryOps = {'add': '+', 'divide': '/', 'multiply': '*', 'subtract': '-'} + + +def GetRelayFunctionCall(function_name) -> RelayFunctionCall: + """ + Returns the corresponding name, function, and op (if it is a binary op, otherwise None). + If the function isn't supported, fails with an assertion. + """ + function = name = op = None + assert function_name in BuiltInBinaryOps or function_name in RelayFunctionCalls, \ + f'{function_name} is not supported for lowering from Relay IR to FuTIL.' + if function_name in BuiltInBinaryOps: + op = BuiltInBinaryOps[function_name] + function = broadcast + name = function_name + else: + function = RelayFunctionCalls[function_name] + name = function.__name__ + return function, name, op diff --git a/frontends/relay-futil/futil_ast.py b/frontends/relay-futil/futil_ast.py index 03f2e8fa0e..e1194a826c 100644 --- a/frontends/relay-futil/futil_ast.py +++ b/frontends/relay-futil/futil_ast.py @@ -4,6 +4,7 @@ from types import FunctionType from enum import Enum, IntEnum + # Note: The integer value N for Memory with dimension N is used; these should remain unchanged. class PrimitiveType(IntEnum): Memory1D = 1 @@ -110,65 +111,43 @@ class FComponent: Represents a FuTIL component. ''' name: str - cells: List[Cell] # Instantiated sub-components. - wires: List[FConnection] # Wire connections between components. + wires = [] # Wire connections between components. + cells = {} # Instantiated sub-components. This is a mapping from {`dahlia_name`, FCell}. controls: FControl = None # Control statement for this component. signature: FSignature = None # Input and output ports. - def contains_primitive(self, name: str): - ''' - Determines whether this component contains a primitive with the given name. - ''' - # TODO(cgyurgyik): Rethink data structure here. - for cell in self.cells: - if not cell.is_primitive(): continue - if cell.primitive.name == name: return True - return False - def add_cell(self, subcomponent: Cell): ''' Appends a subcomponent to this component's list of FuTIL cells. ''' - if not subcomponent.is_primitive(): - self.cells.append(subcomponent) - return - if self.contains_primitive(subcomponent.primitive.name): return - self.cells.append(subcomponent) + if subcomponent == None: return + if subcomponent.is_primitive(): + self.cells[subcomponent.primitive.name] = subcomponent + elif subcomponent.is_relay_function(): + self.cells[subcomponent.relay_function.name] = subcomponent @dataclass -class DahliaDeclaration: - decl_name: str +class RelayFunctionCall: + """ + Represents a Relay function call. This will eventually be translated to Dahlia and subsequently lowered to FuTIL. + """ + name: str component_name: str - op: str = None + op: str = None # Binary operation associated with the Relay function call, if it exists. + attributes: tvm.ir.Attrs = None # Attributes associated with the Relay function call, e.g. `axis`, `padding`. + lowering_function: FunctionType = None # The function used to convert the Dahlia representation to FuTIL. inputs: List[Cell] = None output: Cell = None - attributes: tvm.ir.Attrs = None - function: FunctionType = None - program: str = None - - def invoke(self): - self.program = self.function(self) - - -@dataclass -class FDeclaration: - ''' - Represents a FuTIL declaration. - ''' - name: str - component: FComponent = None @dataclass class FCell(Cell): dahlia_name: str = None primitive: FPrimitive = None - declaration: FDeclaration = None - dahlia_declaration: DahliaDeclaration = None + relay_function: RelayFunctionCall = None + # TODO(cgyurgyik): Is there a better way to do this, such as std::variant in C++? def is_primitive(self): return self.primitive != None - def is_declaration(self): return self.declaration != None - - def is_dahlia_declaration(self): return self.dahlia_declaration != None + def is_relay_function(self): return self.relay_function != None diff --git a/frontends/relay-futil/pretty_print.py b/frontends/relay-futil/pretty_print.py index 7a57e6e6b7..fa3935f6e4 100644 --- a/frontends/relay-futil/pretty_print.py +++ b/frontends/relay-futil/pretty_print.py @@ -2,14 +2,14 @@ import textwrap -def mk_block(decl, contents, indent=2): +def pp_block(decl, contents, indent=2): """Format a block like this: decl { contents } where `decl` is one line but contents can be multiple lines. """ - return decl + ' {\n' + textwrap.indent(contents, indent * ' ') + '\n}' + return ''.join((decl, ' {\n', textwrap.indent(contents, indent * ' '), '\n}')) def pp_component_signature(component: FComponent): @@ -39,7 +39,7 @@ def pp_connections(component: FConnection): wires = [] for wire in connection.group.wires: wires.append(pp_wire(wire)) - connections.append(mk_block(f'group {connection.group.name}', '\n'.join(wires))) + connections.append(pp_block(f'group {connection.group.name}', '\n'.join(wires))) return connections @@ -49,24 +49,57 @@ def pp_control(component: FComponent): groups = [] for group_name in control.stmts: groups.append(f'{group_name};') - ctrls.append(mk_block(control.name, '\n'.join(groups))) + ctrls.append(pp_block(control.name, '\n'.join(groups))) return ctrls -def pp_component(component: FComponent): +def pp_lowered_dahlia_components(component: FComponent): + relay_functions = [] + for cell in component.cells.values(): + if cell == None or not cell.is_relay_function(): continue + relay_call = cell.relay_function + relay_functions.append(relay_call.lowering_function(relay_call)) + return '\n'.join(relay_functions) + + +def pp_lowered_relay_function(component: FComponent): + """ + Pretty prints the main program. This consists of the following: + 1. Relay functions lowered from Dahlia -> FuTIL. + 2. The `main` component. + + Example: + ------------------------------------ + Input + ``` + fn (%x: int32, %y: int32) { let %z = add(%x, %y); %z } + ``` + ------------------------------------ + Output + ``` + component add(...) -> (...) { ... } + + component main() -> () { + ... + control { run_add; } + } + ``` + """ + relay_function_components = pp_lowered_dahlia_components(component) + subcomponents = [] - for cell in component.cells: - if cell == None: - continue + for cell in component.cells.values(): + if cell == None: continue subcomponents.append(pp_cell(cell)) - cells = mk_block("cells", '\n'.join(subcomponents)) + cells = pp_block("cells", '\n'.join(subcomponents)) inputs, outputs = pp_component_signature(component) - wires = mk_block("wires", '\n'.join(pp_connections(component))) - - controls = "" if component.controls == None else '\n'.join(pp_control(component)) - control = mk_block("control", controls) + wires = pp_block("wires", '\n'.join(pp_connections(component))) - return mk_block(f'component {component.name} ({inputs}) -> ({outputs})', '\n'.join([cells, wires, control])) + controls = '\n'.join(pp_control(component)) + control = pp_block("control", controls) + main_component = pp_block(f'component {component.name} ({inputs}) -> ({outputs})', + '\n'.join([cells, wires, control])) + return '\n'.join((relay_function_components, main_component)) def pp_cell(cell: FCell): @@ -100,11 +133,8 @@ def pp_cell(cell: FCell): if cell.primitive.type == PrimitiveType.BinOp: op = data[1] return f'{cell.primitive.name} = prim std_{op}({bitwidth});' - assert False, f'FCell pretty print unimplemented for {cell} with name {cell.primitive.name}' - elif cell.is_declaration(): - return f'{cell.declaration.name} = {cell.declaration.component.name};' - elif cell.is_dahlia_declaration(): - return f'{cell.dahlia_declaration.decl_name} = {cell.dahlia_declaration.component_name};' + if cell.is_relay_function(): return f'{cell.relay_function.name} = {cell.relay_function.component_name};' + assert False, f'FCell pretty print unimplemented for {cell} with name {cell.primitive.name}' # Dahlia Pretty Printing. @@ -112,17 +142,17 @@ def pp_cell(cell: FCell): def next_character(ch, dir=1): """ Returns the next character after 'ch'. - If dir is positive, then will return 'ch' + 1. Otherwise, it will return 'ch' - 1. + If `dir` is positive, then will return 'ch' + 1. Otherwise, it will return 'ch' - 1. """ - return chr(ord(ch) + dir) if dir > 0 else chr(ord(ch) - 1) + return chr(ord(ch) + 1) if dir > 0 else chr(ord(ch) - 1) def pp_dahlia_memory_declarations(declaration_list): declarations = [] - for decl in declaration_list: - decl_string = f'decl {decl.name}: {decl.data_type}<{decl.data[0]}>' - for i in range(0, decl.type): decl_string += f'[{decl.data[i + 1]}]' - declarations.append(f'{decl_string};') + for declaration in declaration_list: + string = f'decl {declaration.name}: {declaration.data_type}<{declaration.data[0]}>' + for i in range(0, declaration.type): string += f'[{declaration.data[i + 1]}]' + declarations.append(string + ";") return '\n'.join(declarations) diff --git a/frontends/relay-futil/utilities.py b/frontends/relay-futil/utilities.py index fc171f60e9..69cc8c4e0a 100644 --- a/frontends/relay-futil/utilities.py +++ b/frontends/relay-futil/utilities.py @@ -6,6 +6,11 @@ NumDimensionsToPrimitive = {1: PrimitiveType.Memory1D, 2: PrimitiveType.Memory2D, 3: PrimitiveType.Memory3D, 4: PrimitiveType.Memory4D} +# Mapping between primitive type and associated Dahlia name extension. +# E.g. A 2D memory primitive named `A` will be lowered to `A0_0`. +DahliaNameExtension = {PrimitiveType.Memory1D: '0', PrimitiveType.Memory2D: '0_0', + PrimitiveType.Memory3D: '0_0_0', PrimitiveType.Memory4D: '0_0_0_0'} + def flatten(l): ''' @@ -26,8 +31,9 @@ def get_dahlia_data_type(relay_type): Gets the Dahlia data type from the given Relay type. NOTE: Currently, Dahlia does not support signed types for arrays. ''' - if 'int' in relay_type: return 'ubit' - if 'float' in relay_type: return 'ufix' + dtype = relay_type.dtype + if 'int' in dtype: return 'ubit' + if 'float' in dtype: return 'ufix' assert False, f'{relay_type} is not supported.' @@ -37,12 +43,12 @@ def get_bitwidth(relay_type): If the relay_type is floating point of size N, returns a fixed point of size . This lowers to a fixed point cell with `int_width` of size N/2, and a `fract_width` of size N/2. ''' - type = str(relay_type) - length = len(type) - if 'int' in type: return type[3:length] - if 'float' in type: - width = int(type[5:length]) - return f'{width}, {int(width / 2)}' + dtype = relay_type.dtype + length = len(dtype) + if 'int' in dtype: return dtype[3:length] + if 'float' in dtype: + width = dtype[5:length] + return f'{width}, {int(width) // 2}' assert False, f'{relay_type} is not supported.' @@ -55,16 +61,16 @@ def get_memory_parameters(type): We then parse this to determine the corresponding FuTIL and Dahlia types. ''' - t = str(type) - data_type = get_dahlia_data_type(t) - if t[0:3] == 'int' or t[0:5] == 'float': + typ = str(type) + data_type = get_dahlia_data_type(type) + + if typ[0:3] == 'int' or typ[0:5] == 'float': + # Currently, we are treating scalar values as 1D Memory primitives. return [get_bitwidth(type), 1, 1], PrimitiveType.Memory1D, data_type - assert t[0:6] == 'Tensor', f'{type} is not currently supported.' - string_type = t[t.find(")") + 3:t.find("]")] - string_dimensions = t[t.find("(") + 1:t.find(")")] + assert typ[0:6] == 'Tensor', f'{type} is not currently supported.' - tensor_dimensions = list(map(int, string_dimensions.split(','))) - data, num_dimensions = [get_bitwidth(string_type)], len(tensor_dimensions) + tensor_dimensions = type.concrete_shape + data, num_dimensions = [get_bitwidth(type)], len(tensor_dimensions) assert num_dimensions in NumDimensionsToPrimitive, f'{num_dimensions} dimensions is not supported.' for dimension in tensor_dimensions: data.append(dimension) # Size. for dimension in tensor_dimensions: data.append(int(math.log2(dimension) + 1)) # Index size. @@ -73,51 +79,44 @@ def get_memory_parameters(type): def build_main_controls(c: FComponent): ''' - Builds the wires and control for the `main` component. - This is done by creating a group run_* with its respective - wiring for each Dahlia declaration, and adding it to the - control. + Builds the wires and control for the `main` component. This is done by creating a group `run_*` + with its respective wiring for each Relay function call, and adding it to the control. ''' - dahlia_declarations = [] - for cell in reversed(c.cells): - if not cell.is_dahlia_declaration(): continue - dahlia_declarations.append(cell.dahlia_declaration) - - for declaration in dahlia_declarations: - inputs = declaration.inputs + for cell in reversed(c.cells.values()): + if not cell.is_relay_function(): continue + function = cell.relay_function + inputs, output = function.inputs, function.output wires = [] - group_name = f'run_{declaration.component_name}' + group_name = f'run_{function.component_name}' for input in flatten(inputs): prim = input.primitive - wires.append(FWire(f'{prim.name}.addr0', f'{declaration.decl_name}.{input.dahlia_name}_addr0')) + wires.append(FWire(f'{prim.name}.addr0', f'{function.name}.{input.dahlia_name}_addr0')) wires.append( - FWire(f'{declaration.decl_name}.{input.dahlia_name}_read_data', f'{prim.name}.read_data')) + FWire(f'{function.name}.{input.dahlia_name}_read_data', f'{prim.name}.read_data')) if prim.type == PrimitiveType.Memory1D: continue - wires.append(FWire(f'{prim.name}.addr1', f'{declaration.decl_name}.{input.dahlia_name}_addr1')) + wires.append(FWire(f'{prim.name}.addr1', f'{function.name}.{input.dahlia_name}_addr1')) if prim.type == PrimitiveType.Memory2D: continue - wires.append(FWire(f'{prim.name}.addr2', f'{declaration.decl_name}.{input.dahlia_name}_addr2')) + wires.append(FWire(f'{prim.name}.addr2', f'{function.name}.{input.dahlia_name}_addr2')) if prim.type == PrimitiveType.Memory3D: continue - wires.append(FWire(f'{prim.name}.addr3', f'{declaration.decl_name}.{input.dahlia_name}_addr3')) - - output = declaration.output - type = output.primitive.type - wires.append(FWire(f'{output.primitive.name}.addr0', f'{declaration.decl_name}.{output.dahlia_name}_addr0')) - if type == PrimitiveType.Memory2D or type == PrimitiveType.Memory3D or type == PrimitiveType.Memory4D: - wires.append(FWire(f'{output.primitive.name}.addr1', f'{declaration.decl_name}.{output.dahlia_name}_addr1')) - if type == PrimitiveType.Memory3D or type == PrimitiveType.Memory4D: - wires.append(FWire(f'{output.primitive.name}.addr2', f'{declaration.decl_name}.{output.dahlia_name}_addr2')) - if type == PrimitiveType.Memory4D: - wires.append(FWire(f'{output.primitive.name}.addr3', f'{declaration.decl_name}.{output.dahlia_name}_addr3')) - - wires.append( - FWire(f'{output.primitive.name}.write_data', f'{declaration.decl_name}.{output.dahlia_name}_write_data')) - wires.append( - FWire(f'{output.primitive.name}.write_en', f'{declaration.decl_name}.{output.dahlia_name}_write_en')) - wires.append(FWire(f'{declaration.decl_name}.{output.dahlia_name}_done', f'{output.primitive.name}.done')) - wires.append(FWire(f'{declaration.decl_name}.go', "1'd1")) - wires.append(FWire(f'{group_name}[done]', f"{declaration.decl_name}.done ? 1'd1")) + wires.append(FWire(f'{prim.name}.addr3', f'{function.name}.{input.dahlia_name}_addr3')) + + output_type, output_name = output.primitive.type, output.primitive.name + for i in range(0, 1): + wires.append(FWire(f'{output_name}.addr0', f'{function.name}.{output.dahlia_name}_addr0')) + if output_type == PrimitiveType.Memory1D: break + wires.append(FWire(f'{output_name}.addr1', f'{function.name}.{output.dahlia_name}_addr1')) + if output_type == PrimitiveType.Memory2D: break + wires.append(FWire(f'{output_name}.addr2', f'{function.name}.{output.dahlia_name}_addr2')) + if output_type == PrimitiveType.Memory3D: break + wires.append(FWire(f'{output_name}.addr3', f'{function.name}.{output.dahlia_name}_addr3')) + + wires.append(FWire(f'{output_name}.write_data', f'{function.name}.{output.dahlia_name}_write_data')) + wires.append(FWire(f'{output_name}.write_en', f'{function.name}.{output.dahlia_name}_write_en')) + wires.append(FWire(f'{function.name}.{output.dahlia_name}_done', f'{output_name}.done')) + wires.append(FWire(f'{function.name}.go', "1'd1")) + wires.append(FWire(f'{group_name}[done]', f"{function.name}.done ? 1'd1")) c.wires.append(FConnection(group=FGroup(name=group_name, wires=wires, attributes=[]))) - # Ensures that only group names make it into the controls of a component. + # Ensures that only group names make it into the controls of a FuTIL component. connections = list(filter(lambda w: w.is_group(), c.wires)) c.controls = [Seq(stmts=list(map(lambda w: w.group.name, connections)))] From cc13e7cc59ef48064559dca8dbecfc960b1908d3 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Tue, 8 Dec 2020 20:31:29 -0500 Subject: [PATCH 72/75] Continued modular efforts. --- frontends/relay-futil/compiler.py | 20 +- ...dahlia_functions.py => dahlia_lowering.py} | 217 ++++++++++-------- frontends/relay-futil/example.py | 1 + frontends/relay-futil/pretty_print.py | 51 ---- frontends/relay-futil/tests/add.expect | 28 +-- .../relay-futil/tests/batch_flatten.expect | 30 +-- .../relay-futil/tests/batch_matmul.expect | 40 ++-- frontends/relay-futil/tests/bias_add.expect | 40 ++-- frontends/relay-futil/tests/broadcast.expect | 38 +-- frontends/relay-futil/tests/conv2d.expect | 46 ++-- frontends/relay-futil/tests/dense.expect | 34 +-- .../relay-futil/tests/fixed_point_add.expect | 28 +-- frontends/relay-futil/tests/let1.expect | 28 +-- frontends/relay-futil/tests/let2.expect | 58 ++--- frontends/relay-futil/tests/let3.expect | 100 ++++---- frontends/relay-futil/tests/let3.relay | 2 +- frontends/relay-futil/tests/max_pool2d.expect | 36 +-- frontends/relay-futil/tests/relu.expect | 36 +-- frontends/relay-futil/tests/softmax.expect | 28 +-- frontends/relay-futil/tests/sqrt.expect | 36 +-- frontends/relay-futil/tests/sub.expect | 28 +-- .../relay-futil/tests/tensor1d_mult.expect | 28 +-- .../relay-futil/tests/tensor2d_add.expect | 34 +-- .../relay-futil/tests/tensor3d_divide.expect | 40 ++-- frontends/relay-futil/utilities.py | 10 +- 25 files changed, 508 insertions(+), 529 deletions(-) rename frontends/relay-futil/{dahlia_functions.py => dahlia_lowering.py} (72%) diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 4dd0de566e..d3eb4461d3 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -7,7 +7,7 @@ from pretty_print import * from utilities import * from futil_ast import * -from dahlia_functions import * +from dahlia_lowering import * class Relay2Futil(ExprFunctor): @@ -34,8 +34,8 @@ def relay_id(self, name): %x = foo(%y); %x1 = bar(%x); // Here, at this level, the name_hint associated with `x1` is still 'x'. - To avoid this, we provide Relay with its own identification dictionary. If 'x' is seen - three times, it will produce: 'x', 'x1', x2'. + To avoid this, we provide Relay with its own identification dictionary. + If 'x' is seen three times, it will produce: 'x', 'x1', x2'. """ id_number = self.relay_id_dictionary[name] self.relay_id_dictionary[name] += 1 @@ -62,7 +62,7 @@ def visit_var(self, var) -> FCell: def visit_let(self, let): values, output = self.visit(let.value), self.visit(let.var) if isinstance(values, list): - for value in values: + for value in flatten(values): if value.is_relay_function(): value.relay_function.output = output return [self.visit(let.body), values] @@ -81,8 +81,9 @@ def visit_call(self, call) -> List[FCell]: # We are representing all function calls in Relay IR at the Dahlia level, which will then be lowered to FuTIL. # Note, the Relay function's output is not defined until the `let` statement is visited. function, name, op = GetRelayFunctionCall(call.op.name) - relay_function_call = RelayFunctionCall(component_name=self.relay_id(name), name=self.id(name), op=op, - inputs=args, attributes=call.attrs, lowering_function=function) + component_name = self.id(name) + relay_function_call = RelayFunctionCall(component_name=component_name, name=f'comp_{component_name}', + op=op, inputs=args, attributes=call.attrs, lowering_function=function) cells.append(FCell(relay_function=relay_function_call)) return cells @@ -95,14 +96,13 @@ def visit_function(self, function): def relay_transforms(expr: Function) -> Function: """https://tvm.apache.org/docs/api/python/relay/transform.html""" - transform = tvm.transform.Sequential([ + transforms = tvm.transform.Sequential([ relay.transform.SimplifyExpr(), relay.transform.SimplifyInference(), - relay.transform.InferType() + relay.transform.InferType(), ]) mod = ir.IRModule.from_expr(expr) - mod['main'] = expr - mod = transform(mod) + mod = transforms(mod) return mod['main'] diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_lowering.py similarity index 72% rename from frontends/relay-futil/dahlia_functions.py rename to frontends/relay-futil/dahlia_lowering.py index a3af6206e6..3bb822e681 100644 --- a/frontends/relay-futil/dahlia_functions.py +++ b/frontends/relay-futil/dahlia_lowering.py @@ -3,7 +3,6 @@ from tempfile import NamedTemporaryFile, TemporaryFile from futil_ast import * -from pretty_print import * IMPORT_STATEMENT = """import "primitives/std.lib";\n""" NO_ERR = "2>/dev/null" @@ -11,7 +10,68 @@ CHARACTER_I = chr(ord('i')) # Starting index variable name for Dahlia array iteration. -def LowerDahliaProgramToFuTIL(program, component_name): +def next_character(ch, dir=1): + """ + Returns the next character after 'ch'. + If `dir` is positive, then will return 'ch' + 1. Otherwise, it will return 'ch' - 1. + """ + return chr(ord(ch) + 1) if dir > 0 else chr(ord(ch) - 1) + + +def PPDahliaMemoryDeclarations(relay_function): + """ + Pretty print for Dahlia memory declarations, e.g. + `decl X: ubit<32> [1][10];` + """ + cell_list = relay_function.inputs + cell_list.append(relay_function.output) + + declarations = [] + for cell in cell_list: + declaration = cell.primitive + declaration_str = f'decl {declaration.name}: {declaration.data_type}<{declaration.data[0]}>' + for i in range(0, declaration.type): declaration_str += f'[{declaration.data[i + 1]}]' + declarations.append(declaration_str + ";") + return '\n'.join(declarations) + + +def PPDahliaLoop(relay_function, body, num_dimensions, data=None): + """ + Returns an iteration over data with `body` as the work done within the nested loop(s). + Many tensor functions share the same control flow: (1) Iterate `num_dimensions` times, and (2) do some work in body. + For example, if `data` is a 2D primitive of size (M, N) and body == `X;`, then this will return: + + ``` + for (let i: ubit = 0..M) { + for (let j: ubit = 0..N) { + X; + } + } + ``` + + Notes: + If `data` is provided, it will be used to determine the `num_dimensions` as well as the corresponding bitwidths + and memory sizes. This occurs only in special cases; otherwise, the `output` of the `relay_function` will + determine these. + """ + variable_name = CHARACTER_I + program = [] + SPACING = '' + output = relay_function.output.primitive if data == None else data + for i in range(0, num_dimensions): + size, index_size = output.data[i + 1], output.data[i + num_dimensions + 1] + program.append(f'{SPACING}for (let {variable_name}: ubit<{index_size}> = 0..{size}) {{') + variable_name = next_character(variable_name) + SPACING += ' ' + program.append(f'{SPACING}{body}') + + for i in range(0, num_dimensions): + SPACING = SPACING[:-2] + program.append(SPACING + '}') + return '\n'.join(program) + + +def LowerDahliaProgramToFuTIL(relay_function, dahlia_body, dahlia_imports=None): """ Takes in a string representation of a Dahlia program, lowers it to FuTIL with the given `component_name`, and applies the `externalize` pass. This pass exposes the inputs and outputs of primitive types that are @@ -19,6 +79,7 @@ def LowerDahliaProgramToFuTIL(program, component_name): Example: ------ Dahlia, component name: ProcessX ------ + import "foo.h" { ... } decl X: ubit<32>[4]; ... @@ -35,13 +96,15 @@ def LowerDahliaProgramToFuTIL(program, component_name): ... } """ - program_string = '\n'.join(program.splitlines()) + if dahlia_imports == None: dahlia_imports = '' + program_string = '\n'.join((dahlia_imports, PPDahliaMemoryDeclarations(relay_function), dahlia_body)) + with NamedTemporaryFile() as tf0, NamedTemporaryFile() as tf1, NamedTemporaryFile() as tf2: tf0.write(bytes(program_string, 'UTF-8')) tf0.seek(0), tf1.seek(0), tf2.seek(0) fuse_binary = os.environ['DAHLIA_EXEC'] if 'DAHLIA_EXEC' in os.environ else 'fuse' command = f""" - {fuse_binary} {tf0.name} --lower -b=futil -n={component_name} > {tf1.name} {NO_ERR} \ + {fuse_binary} {tf0.name} --lower -b=futil -n={relay_function.component_name} > {tf1.name} {NO_ERR} \ && fud e --from futil {tf1.name} --to futil-externalize > {tf2.name} {NO_ERR}""" subprocess.Popen(command, stdout=subprocess.PIPE, shell=True).communicate() component = tf2.read().decode()[len(IMPORT_STATEMENT):] # Skip over importing the primitives library. @@ -49,10 +112,10 @@ def LowerDahliaProgramToFuTIL(program, component_name): #################################################################################################### -################################ Dahlia Implementations ############################################ +################## Dahlia Implementations for Relay Function Calls ################################# #################################################################################################### -def broadcast(declaration): +def broadcast(function: RelayFunctionCall): """ https://numpy.org/doc/stable/user/basics.broadcasting.html Implements array broadcasting: @@ -72,8 +135,7 @@ def broadcast(declaration): result[i][j][k] := op1[i][0][k] op op2[j][0]; ... """ - op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive - + op1, op2, res = function.inputs[0].primitive, function.inputs[1].primitive, function.output.primitive op1_dims, op2_dims, res_dims = op1.type, op2.type, res.type op1_sizes, op2_sizes, res_sizes = [], [], [] # Get memory sizes in reversed order. @@ -109,18 +171,15 @@ def broadcast(declaration): op1_index = ''.join(reversed(op1_indices)) op2_index = ''.join(reversed(op2_indices)) res_index = ''.join(reversed(res_indices)) - loop_body = f'{res.name}{res_index} := {op1.name}{op1_index} {declaration.op} {op2.name}{op2_index};' + loop_body = f'{res.name}{res_index} := {op1.name}{op1_index} {function.op} {op2.name}{op2_index};' - program_body = pp_dahlia_loop(res, loop_body) - declarations = pp_dahlia_memory_declarations([res, op1, op2]) - program = f"""{declarations}{NEWL}{program_body}""" - return LowerDahliaProgramToFuTIL(program, declaration.component_name) + return LowerDahliaProgramToFuTIL(function, PPDahliaLoop(function, loop_body, num_dimensions=res_dims)) -def batch_flatten(declaration): +def batch_flatten(function): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_flatten""" - data, res = declaration.inputs[0].primitive, declaration.output.primitive - bitwidth, num_dimensions = data.data[0], data.type + data, res = function.inputs[0].primitive, function.output.primitive + bitwidth, num_dimensions = res.data[0], data.type res_index_size1 = res.data[4] variable_name = CHARACTER_I @@ -133,20 +192,18 @@ def batch_flatten(declaration): variable_name = next_character(variable_name) res_indices += f'[{variable_name}]' - declarations = pp_dahlia_memory_declarations([data, res]) let_flattened = f'let {variable_name}: ubit<{res_index_size1}> = 0;' body = f"{res.name}{res_indices} := {data.name}{data_indices}; {variable_name} := {variable_name} + 1;" - program_body = pp_dahlia_loop(data, body) - program = f"""{declarations}{NEWL}{let_flattened}{NEWL}{program_body}""" - return LowerDahliaProgramToFuTIL(program, declaration.component_name) + program_body = '\n'.join((let_flattened, PPDahliaLoop(function, body, num_dimensions, data))) + return LowerDahliaProgramToFuTIL(function, program_body) -def bias_add(declaration): +def bias_add(function): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.bias_add""" - data, bias, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive + data, bias, res = function.inputs[0].primitive, function.inputs[1].primitive, function.output.primitive bitwidth, num_dimensions = data.data[0], data.type - axis_attribute = declaration.attributes.get_int("axis") + axis_attribute = function.attributes.get_int("axis") axis = num_dimensions - 1 if axis_attribute == -1 else axis_attribute variable_name = CHARACTER_I @@ -159,22 +216,19 @@ def bias_add(declaration): data_indices += index variable_name = next_character(variable_name) - declarations = pp_dahlia_memory_declarations([data, bias, res]) - body = (f"{res.name}{data_indices} := {data.name}{data_indices} + {bias.name}{bias_index};") - program_body = pp_dahlia_loop(data, body) - return LowerDahliaProgramToFuTIL(f"""{declarations}{NEWL}{program_body}""", declaration.component_name) + body = f"{res.name}{data_indices} := {data.name}{data_indices} + {bias.name}{bias_index};" + return LowerDahliaProgramToFuTIL(function, PPDahliaLoop(function, body, num_dimensions)) # TODO(cgyurgyik): # 1. This won't work for fixed point currently, since Dahlia # will not take fixed point operands for the `>` operator. # 2. Without signed bit array support, this is also meaningless. -def relu(declaration): +def relu(function): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.relu""" - data, res = declaration.inputs[0].primitive, declaration.output.primitive + data, res = function.inputs[0].primitive, function.output.primitive bitwidth, num_dimensions, data_type = data.data[0], data.type, data.data_type - declarations = pp_dahlia_memory_declarations([data, res]) zero = '0.0' if data_type == 'ufix' or data_type == 'fix' else '0' let_zero = f'let zero: {data_type}<{bitwidth}> = {zero};' @@ -186,16 +240,15 @@ def relu(declaration): variable_name = next_character(variable_name) body = f"""if ({data.name}{indices} > zero) {{ {res.name}{indices} := {data.name}{indices}; }} - else {{ {res.name}{indices} := zero; }}""" - program_body = pp_dahlia_loop(data, body) - return LowerDahliaProgramToFuTIL(f"""{declarations}{NEWL}{let_zero}{NEWL}{program_body}""", - declaration.component_name) + else {{ {res.name}{indices} := zero; }}""" + program_body = '\n'.join((let_zero, PPDahliaLoop(function, body, num_dimensions))) + return LowerDahliaProgramToFuTIL(function, program_body) # TODO(cgyurgyik): Similar to ReLU, this requires signed operands. -def negative(declaration): +def negative(function): """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.negative""" - op, res = declaration.inputs[0].primitive, declaration.output.primitive + op, res = function.inputs[0].primitive, function.output.primitive bitwidth, num_dimensions, data_type = op.data[0], op.type, op.data_type indices = "" @@ -205,15 +258,14 @@ def negative(declaration): indices += f'[{variable_name}]' variable_name = next_character(variable_name) - declarations = pp_dahlia_memory_declarations([op, res]) zero = '0.0' if data_type == 'ufix' or data_type == 'fix' else '0' - program_body = pp_dahlia_loop(op, f"""{res.name}{indices} := {zero} - {op.name}{indices};""") - return LowerDahliaProgramToFuTIL(f"""{declarations}{NEWL}{program_body}""", declaration.component_name) + program_body = PPDahliaLoop(function, f"""{res.name}{indices} := {zero} - {op.name}{indices};""", num_dimensions) + return LowerDahliaProgramToFuTIL(function, program_body) -def sqrt(declaration): +def sqrt(function): """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.sqrt""" - op, res = declaration.inputs[0].primitive, declaration.output.primitive + op, res = function.inputs[0].primitive, function.output.primitive bitwidth, num_dimensions, data_type = op.data[0], op.type, op.data_type include_sqrt = f"""import "fxp_sqrt.h" {{ def sqrt(value: {data_type}<{bitwidth}>): {data_type}<{bitwidth}>; }}""" @@ -224,20 +276,16 @@ def sqrt(declaration): indices += f'[{variable_name}]' variable_name = next_character(variable_name) - declarations = pp_dahlia_memory_declarations([op, res]) - program_body = pp_dahlia_loop(op, f"""{res.name}{indices} := sqrt({op.name}{indices});""") - return LowerDahliaProgramToFuTIL(f"""{include_sqrt}{NEWL}{declarations}{NEWL}{program_body}""", - declaration.component_name) + program_body = PPDahliaLoop(function, f"""{res.name}{indices} := sqrt({op.name}{indices});""", num_dimensions) + return LowerDahliaProgramToFuTIL(function, program_body, include_sqrt) -def expand_dims(declaration): +def expand_dims(function): """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.expand_dims""" - axis, num_newaxis = declaration.attributes.get_int("axis"), declaration.attributes.get_int("num_newaxis") - data, res = declaration.inputs[0].primitive, declaration.output.primitive + axis, num_newaxis = function.attributes.get_int("axis"), function.attributes.get_int("num_newaxis") + data, res = function.inputs[0].primitive, function.output.primitive bitwidth, num_dimensions = data.data[0], data.type - declarations = pp_dahlia_memory_declarations([data, res]) - res_indices, data_indices = "", "" variable_name = CHARACTER_I for i in range(0, num_dimensions): @@ -249,13 +297,13 @@ def expand_dims(declaration): for _ in range(0, num_newaxis): res_indices += '[0]' variable_name = next_character(variable_name) - program_body = pp_dahlia_loop(data, f'{res.name}{res_indices} := {data.name}{data_indices}') - return LowerDahliaProgramToFuTIL(f"""{declarations}{NEWL}{program_body}""", declaration.component_name) + program_body = PPDahliaLoop(function, f'{res.name}{res_indices} := {data.name}{data_indices}', num_dimensions, data) + return LowerDahliaProgramToFuTIL(function, program_body) -def batch_matmul(declaration): +def batch_matmul(function): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_matmul""" - op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive + op1, op2, res = function.inputs[0].primitive, function.inputs[1].primitive, function.output.primitive bitwidth, M1_size0, M1_size1, M1_size2 = op1.data[0], op1.data[1], op1.data[2], op1.data[3] M1_index_size0, M1_index_size1, M1_index_size2 = op1.data[4], op1.data[5], op1.data[6] M2_size0, M2_size1, M2_size2 = op2.data[1], op2.data[2], op2.data[3] @@ -265,8 +313,7 @@ def batch_matmul(declaration): # 3. Copy temporary value to return value.* # * This third step may not be necessary, but trying to conduct the matrix multiply # directly with the return value declared resulted in incorrect outputs. - declarations = pp_dahlia_memory_declarations([res, op1, op2]) - program = f"""{declarations} + program_body = f""" let transpose_{op2.name}: {op2.data_type}<{bitwidth}>[{M2_size0}][{M2_size2}][{M2_size1}]; let temporary_{res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M1_size1}][{M2_size1}]; for (let batch: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ @@ -297,19 +344,18 @@ def batch_matmul(declaration): }} }} """ - return LowerDahliaProgramToFuTIL(program, declaration.component_name) + return LowerDahliaProgramToFuTIL(function, program_body) # TODO(cgyurgyik): Similar to batch_matmul, this requires a temporary memory to store the output # of the matrix multiply. Otherwise, the values aren't computed properly. Look deeper into this. -def dense(declaration): +def dense(function): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.dense""" - op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive + op1, op2, res = function.inputs[0].primitive, function.inputs[1].primitive, function.output.primitive bitwidth, M1_size0, M1_size1 = op1.data[0], op1.data[1], op1.data[2] M1_index_size0, M1_index_size1 = op1.data[3], op1.data[4] M2_size0, M2_size1, M2_index_size0, M2_index_size1 = op2.data[1], op2.data[2], op2.data[3], op2.data[4] program = f""" - {pp_dahlia_memory_declarations([res, op1, op2])} let transpose_{op2.name}: {op2.data_type}<{bitwidth}>[{M2_size1}][{M2_size0}]; let temporary_{res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M2_size0}]; for (let i: ubit<{M2_index_size0}> = 0..{M2_size0}) {{ @@ -334,24 +380,22 @@ def dense(declaration): }} }} """ - return LowerDahliaProgramToFuTIL(program, declaration.component_name) + return LowerDahliaProgramToFuTIL(function, program) # TODO(cgyurgyik): Currently, only supports a small subset (namely those used in our VGG net and MLP net examples). -def softmax(declaration): +def softmax(function): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.softmax""" - op, res = declaration.inputs[0].primitive, declaration.output.primitive - axis = declaration.attributes.get_int("axis") + op, res = function.inputs[0].primitive, function.output.primitive + axis = function.attributes.get_int("axis") data_type = op.data_type assert op.type == PrimitiveType.Memory2D, f'nn.softmax with pritmive type Memory{op.type}D is not supported.' assert axis == -1 or axis == 1, f'nn.softmax with axis = {axis} is not supported.' bitwidth, size0, size1, index_size0, index_size1 = op.data[0], op.data[1], op.data[2], op.data[3], op.data[4] import_exp = f"""import "std_exp.h" {{ def exp(x: {data_type}<{bitwidth}>): {data_type}<{bitwidth}>; }}""" - declarations = pp_dahlia_memory_declarations([res, op]) - zero = '0.0' if data_type == 'ufix' or data_type == 'fix' else '0' - body = f""" + program_body = f""" for (let i: ubit<{index_size0}> = 0..{size0}) {{ let {op.name}_expsum: {data_type}<{bitwidth}> = {zero}; for (let j: ubit<{index_size1}> = 0..{size1}) {{ @@ -364,25 +408,22 @@ def softmax(declaration): }} }} """ - program = f"""{import_exp}{NEWL}{declarations}{body}""" - - return LowerDahliaProgramToFuTIL(program, declaration.component_name) + return LowerDahliaProgramToFuTIL(function, program_body, import_exp) -def max_pool2d(declaration): +def max_pool2d(function): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.max_pool2d""" - data, res = declaration.inputs[0].primitive, declaration.output.primitive + data, res = function.inputs[0].primitive, function.output.primitive - strides = declaration.attributes.get_int_tuple("strides") - pool_size = declaration.attributes.get_int_tuple("pool_size") - layout = declaration.attributes.get_str("layout") - ceil_mode = declaration.attributes.get_int("ceil_mode") + strides = function.attributes.get_int_tuple("strides") + pool_size = function.attributes.get_int_tuple("pool_size") + layout = function.attributes.get_str("layout") + ceil_mode = function.attributes.get_int("ceil_mode") assert layout == 'NCHW', f"Layout \'{layout}\' is not currently supported for nn.max_pool2d; please use `NCHW`" assert ceil_mode == False, "`ceil_mode` is not currently supported for nn.max_pool2d" bitwidth, data_type = data.data[0], data.data_type size0, size1, size2, size3 = res.data[1], res.data[2], res.data[3], res.data[4] - declarations = pp_dahlia_memory_declarations([res, data]) program_body = f""" for (let b: ubit<32> = 0..{size0}) {{ for (let c: ubit<32> = 0..{size1}) {{ @@ -406,24 +447,21 @@ def max_pool2d(declaration): }} }} """ - program = f"""{declarations}{NEWL}{program_body}""" - return LowerDahliaProgramToFuTIL(program, declaration.component_name) + return LowerDahliaProgramToFuTIL(function, program_body) # Only supports a small subset of the `conv2d` function. For example, # dilation and grouped convolution are not supported. -def conv2d(declaration): +def conv2d(function): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.conv2d""" - data, weight, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive + data, weight, res = function.inputs[0].primitive, function.inputs[1].primitive, function.output.primitive - strides = declaration.attributes.get_int_tuple("strides") - kernel_size = declaration.attributes.get_int_tuple("kernel_size") - channels = declaration.attributes.get_int("channels") + strides = function.attributes.get_int_tuple("strides") + kernel_size = function.attributes.get_int_tuple("kernel_size") + channels = function.attributes.get_int("channels") bitwidth, data_type = data.data[0], data.data_type size0, size1, size2, size3 = res.data[1], res.data[2], res.data[3], res.data[4] - declarations = pp_dahlia_memory_declarations([res, data, weight]) - zero = '0.0' if data_type == 'ufix' or data_type == 'fix' else '0' program_body = f""" for (let b: ubit<32> = 0..{size0}) {{ @@ -446,8 +484,7 @@ def conv2d(declaration): }} }} """ - program = f"""{declarations}{NEWL}{program_body}""" - return LowerDahliaProgramToFuTIL(program, declaration.component_name) + return LowerDahliaProgramToFuTIL(function, program_body) # Mapping from Relay function names to their respective Dahlia lowering. @@ -461,8 +498,8 @@ def conv2d(declaration): def GetRelayFunctionCall(function_name) -> RelayFunctionCall: """ - Returns the corresponding name, function, and op (if it is a binary op, otherwise None). - If the function isn't supported, fails with an assertion. + Returns the corresponding name, function, and `op` type (if it is a binary op, otherwise None) + of the Relay function call. If the function call isn't supported, fails with an assertion. """ function = name = op = None assert function_name in BuiltInBinaryOps or function_name in RelayFunctionCalls, \ diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index 1028e7cb47..078e90f248 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -16,6 +16,7 @@ def tensor_subtract(): return relay.Function([x, y], relay.subtract(x, y)) +# Trying to read in a function that uses `expand_dims` with relay.fromtext() leads to some peculiar errors. def expand_dims(): x = relay.var('x', shape=[512], dtype='int32') return relay.Function([x], relay.expand_dims(x, axis=1, num_newaxis=2)) diff --git a/frontends/relay-futil/pretty_print.py b/frontends/relay-futil/pretty_print.py index fa3935f6e4..2ce59f4139 100644 --- a/frontends/relay-futil/pretty_print.py +++ b/frontends/relay-futil/pretty_print.py @@ -135,54 +135,3 @@ def pp_cell(cell: FCell): return f'{cell.primitive.name} = prim std_{op}({bitwidth});' if cell.is_relay_function(): return f'{cell.relay_function.name} = {cell.relay_function.component_name};' assert False, f'FCell pretty print unimplemented for {cell} with name {cell.primitive.name}' - - -# Dahlia Pretty Printing. - -def next_character(ch, dir=1): - """ - Returns the next character after 'ch'. - If `dir` is positive, then will return 'ch' + 1. Otherwise, it will return 'ch' - 1. - """ - return chr(ord(ch) + 1) if dir > 0 else chr(ord(ch) - 1) - - -def pp_dahlia_memory_declarations(declaration_list): - declarations = [] - for declaration in declaration_list: - string = f'decl {declaration.name}: {declaration.data_type}<{declaration.data[0]}>' - for i in range(0, declaration.type): string += f'[{declaration.data[i + 1]}]' - declarations.append(string + ";") - return '\n'.join(declarations) - - -def pp_dahlia_loop(data, body): - """ - Returns an iteration over data with `body` as the work done within the nested loop(s). - Many tensor functions share the same control flow: (1) Iterate over `data`, and (2) do some work in body. - For example, if `data` is a 2D primitive of size (M, N) and body == `X;`, then this will return: - - ``` - for (let i: ubit = 0..M) { - for (let j: ubit = 0..N) { - X; - } - } - ``` - """ - variable_name = chr(ord('i')) - num_dimensions = data.type - - program = [] - SPACING = '' - for i in range(0, num_dimensions): - size, index_size = data.data[i + 1], data.data[i + num_dimensions + 1] - program.append(f'{SPACING}for (let {variable_name}: ubit<{index_size}> = 0..{size}) {{') - variable_name = next_character(variable_name) - SPACING += ' ' - program.append(f'{SPACING}{body}') - - for i in range(0, num_dimensions): - SPACING = SPACING[:-2] - program.append(f'{SPACING}}}') - return '\n'.join(program) diff --git a/frontends/relay-futil/tests/add.expect b/frontends/relay-futil/tests/add.expect index f239d18b42..8c08e35f31 100644 --- a/frontends/relay-futil/tests/add.expect +++ b/frontends/relay-futil/tests/add.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component add(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { +component add0(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { add0 = prim std_add(32); add1 = prim std_add(1); @@ -74,25 +74,25 @@ component main () -> () { z = prim std_mem_d1(32, 1, 1); x = prim std_mem_d1(32, 1, 1); y = prim std_mem_d1(32, 1, 1); - add0 = add; + comp_add0 = add0; } wires { - group run_add { - x.addr0 = add0.x0_addr0; - add0.x0_read_data = x.read_data; - y.addr0 = add0.y0_addr0; - add0.y0_read_data = y.read_data; - z.addr0 = add0.z0_addr0; - z.write_data = add0.z0_write_data; - z.write_en = add0.z0_write_en; - add0.z0_done = z.done; - add0.go = 1'd1; - run_add[done] = add0.done ? 1'd1; + group run_add0 { + x.addr0 = comp_add0.x0_addr0; + comp_add0.x0_read_data = x.read_data; + y.addr0 = comp_add0.y0_addr0; + comp_add0.y0_read_data = y.read_data; + z.addr0 = comp_add0.z0_addr0; + z.write_data = comp_add0.z0_write_data; + z.write_en = comp_add0.z0_write_en; + comp_add0.z0_done = z.done; + comp_add0.go = 1'd1; + run_add0[done] = comp_add0.done ? 1'd1; } } control { seq { - run_add; + run_add0; } } } diff --git a/frontends/relay-futil/tests/batch_flatten.expect b/frontends/relay-futil/tests/batch_flatten.expect index 6927e4ad85..c1d01a7bae 100644 --- a/frontends/relay-futil/tests/batch_flatten.expect +++ b/frontends/relay-futil/tests/batch_flatten.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component batch_flatten(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, x0_0_0_addr0: 1, x0_0_0_addr1: 2, x0_0_0_addr2: 2, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { +component batch_flatten0(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, x0_0_0_addr0: 1, x0_0_0_addr1: 2, x0_0_0_addr2: 2, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { cells { add0 = prim std_add(3); add1 = prim std_add(2); @@ -139,26 +139,26 @@ component main () -> () { cells { x1 = prim std_mem_d2(32, 1, 4, 1, 3); x = prim std_mem_d3(32, 1, 2, 2, 1, 2, 2); - batch_flatten0 = batch_flatten; + comp_batch_flatten0 = batch_flatten0; } wires { - group run_batch_flatten { - x.addr0 = batch_flatten0.x0_0_0_addr0; - batch_flatten0.x0_0_0_read_data = x.read_data; - x.addr1 = batch_flatten0.x0_0_0_addr1; - x.addr2 = batch_flatten0.x0_0_0_addr2; - x1.addr0 = batch_flatten0.x10_0_addr0; - x1.addr1 = batch_flatten0.x10_0_addr1; - x1.write_data = batch_flatten0.x10_0_write_data; - x1.write_en = batch_flatten0.x10_0_write_en; - batch_flatten0.x10_0_done = x1.done; - batch_flatten0.go = 1'd1; - run_batch_flatten[done] = batch_flatten0.done ? 1'd1; + group run_batch_flatten0 { + x.addr0 = comp_batch_flatten0.x0_0_0_addr0; + comp_batch_flatten0.x0_0_0_read_data = x.read_data; + x.addr1 = comp_batch_flatten0.x0_0_0_addr1; + x.addr2 = comp_batch_flatten0.x0_0_0_addr2; + x1.addr0 = comp_batch_flatten0.x10_0_addr0; + x1.addr1 = comp_batch_flatten0.x10_0_addr1; + x1.write_data = comp_batch_flatten0.x10_0_write_data; + x1.write_en = comp_batch_flatten0.x10_0_write_en; + comp_batch_flatten0.x10_0_done = x1.done; + comp_batch_flatten0.go = 1'd1; + run_batch_flatten0[done] = comp_batch_flatten0.done ? 1'd1; } } control { seq { - run_batch_flatten; + run_batch_flatten0; } } } diff --git a/frontends/relay-futil/tests/batch_matmul.expect b/frontends/relay-futil/tests/batch_matmul.expect index 0bf73d4754..93a95d5712 100644 --- a/frontends/relay-futil/tests/batch_matmul.expect +++ b/frontends/relay-futil/tests/batch_matmul.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component batch_matmul(go: 1, clk: 1, a0_0_0_read_data: 32, a0_0_0_done: 1, b0_0_0_read_data: 32, b0_0_0_done: 1, x0_0_0_read_data: 32, x0_0_0_done: 1) -> (done: 1, a0_0_0_addr0: 3, a0_0_0_addr1: 3, a0_0_0_addr2: 3, a0_0_0_write_data: 32, a0_0_0_write_en: 1, a0_0_0_clk: 1, b0_0_0_addr0: 3, b0_0_0_addr1: 3, b0_0_0_addr2: 3, b0_0_0_write_data: 32, b0_0_0_write_en: 1, b0_0_0_clk: 1, x0_0_0_addr0: 3, x0_0_0_addr1: 3, x0_0_0_addr2: 3, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1) { +component batch_matmul0(go: 1, clk: 1, a0_0_0_read_data: 32, a0_0_0_done: 1, b0_0_0_read_data: 32, b0_0_0_done: 1, x0_0_0_read_data: 32, x0_0_0_done: 1) -> (done: 1, a0_0_0_addr0: 3, a0_0_0_addr1: 3, a0_0_0_addr2: 3, a0_0_0_write_data: 32, a0_0_0_write_en: 1, a0_0_0_clk: 1, b0_0_0_addr0: 3, b0_0_0_addr1: 3, b0_0_0_addr2: 3, b0_0_0_write_data: 32, b0_0_0_write_en: 1, b0_0_0_clk: 1, x0_0_0_addr0: 3, x0_0_0_addr1: 3, x0_0_0_addr2: 3, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(3); @@ -402,31 +402,31 @@ component main () -> () { x = prim std_mem_d3(32, 4, 7, 7, 3, 3, 3); a = prim std_mem_d3(32, 4, 7, 5, 3, 3, 3); b = prim std_mem_d3(32, 4, 7, 5, 3, 3, 3); - batch_matmul0 = batch_matmul; + comp_batch_matmul0 = batch_matmul0; } wires { - group run_batch_matmul { - a.addr0 = batch_matmul0.a0_0_0_addr0; - batch_matmul0.a0_0_0_read_data = a.read_data; - a.addr1 = batch_matmul0.a0_0_0_addr1; - a.addr2 = batch_matmul0.a0_0_0_addr2; - b.addr0 = batch_matmul0.b0_0_0_addr0; - batch_matmul0.b0_0_0_read_data = b.read_data; - b.addr1 = batch_matmul0.b0_0_0_addr1; - b.addr2 = batch_matmul0.b0_0_0_addr2; - x.addr0 = batch_matmul0.x0_0_0_addr0; - x.addr1 = batch_matmul0.x0_0_0_addr1; - x.addr2 = batch_matmul0.x0_0_0_addr2; - x.write_data = batch_matmul0.x0_0_0_write_data; - x.write_en = batch_matmul0.x0_0_0_write_en; - batch_matmul0.x0_0_0_done = x.done; - batch_matmul0.go = 1'd1; - run_batch_matmul[done] = batch_matmul0.done ? 1'd1; + group run_batch_matmul0 { + a.addr0 = comp_batch_matmul0.a0_0_0_addr0; + comp_batch_matmul0.a0_0_0_read_data = a.read_data; + a.addr1 = comp_batch_matmul0.a0_0_0_addr1; + a.addr2 = comp_batch_matmul0.a0_0_0_addr2; + b.addr0 = comp_batch_matmul0.b0_0_0_addr0; + comp_batch_matmul0.b0_0_0_read_data = b.read_data; + b.addr1 = comp_batch_matmul0.b0_0_0_addr1; + b.addr2 = comp_batch_matmul0.b0_0_0_addr2; + x.addr0 = comp_batch_matmul0.x0_0_0_addr0; + x.addr1 = comp_batch_matmul0.x0_0_0_addr1; + x.addr2 = comp_batch_matmul0.x0_0_0_addr2; + x.write_data = comp_batch_matmul0.x0_0_0_write_data; + x.write_en = comp_batch_matmul0.x0_0_0_write_en; + comp_batch_matmul0.x0_0_0_done = x.done; + comp_batch_matmul0.go = 1'd1; + run_batch_matmul0[done] = comp_batch_matmul0.done ? 1'd1; } } control { seq { - run_batch_matmul; + run_batch_matmul0; } } } diff --git a/frontends/relay-futil/tests/bias_add.expect b/frontends/relay-futil/tests/bias_add.expect index c181b95bbf..18ba0a8d0e 100644 --- a/frontends/relay-futil/tests/bias_add.expect +++ b/frontends/relay-futil/tests/bias_add.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component bias_add(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, bias0_addr0: 7, bias0_write_data: 32, bias0_write_en: 1, bias0_clk: 1, x0_0_0_0_addr0: 1, x0_0_0_0_addr1: 7, x0_0_0_0_addr2: 10, x0_0_0_0_addr3: 9, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 1, x10_0_0_0_addr1: 7, x10_0_0_0_addr2: 10, x10_0_0_0_addr3: 9, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { +component bias_add0(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, bias0_addr0: 7, bias0_write_data: 32, bias0_write_en: 1, bias0_clk: 1, x0_0_0_0_addr0: 1, x0_0_0_0_addr1: 7, x0_0_0_0_addr2: 10, x0_0_0_0_addr3: 9, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 1, x10_0_0_0_addr1: 7, x10_0_0_0_addr2: 10, x10_0_0_0_addr3: 9, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { cells { add0 = prim fixed_p_std_add(32, 16, 16); add1 = prim std_add(9); @@ -167,31 +167,31 @@ component main () -> () { x1 = prim std_mem_d4(32, 1, 64, 512, 256, 1, 7, 10, 9); x = prim std_mem_d4(32, 1, 64, 512, 256, 1, 7, 10, 9); bias = prim std_mem_d1(32, 64, 7); - bias_add0 = bias_add; + comp_bias_add0 = bias_add0; } wires { - group run_bias_add { - x.addr0 = bias_add0.x0_0_0_0_addr0; - bias_add0.x0_0_0_0_read_data = x.read_data; - x.addr1 = bias_add0.x0_0_0_0_addr1; - x.addr2 = bias_add0.x0_0_0_0_addr2; - x.addr3 = bias_add0.x0_0_0_0_addr3; - bias.addr0 = bias_add0.bias0_addr0; - bias_add0.bias0_read_data = bias.read_data; - x1.addr0 = bias_add0.x10_0_0_0_addr0; - x1.addr1 = bias_add0.x10_0_0_0_addr1; - x1.addr2 = bias_add0.x10_0_0_0_addr2; - x1.addr3 = bias_add0.x10_0_0_0_addr3; - x1.write_data = bias_add0.x10_0_0_0_write_data; - x1.write_en = bias_add0.x10_0_0_0_write_en; - bias_add0.x10_0_0_0_done = x1.done; - bias_add0.go = 1'd1; - run_bias_add[done] = bias_add0.done ? 1'd1; + group run_bias_add0 { + x.addr0 = comp_bias_add0.x0_0_0_0_addr0; + comp_bias_add0.x0_0_0_0_read_data = x.read_data; + x.addr1 = comp_bias_add0.x0_0_0_0_addr1; + x.addr2 = comp_bias_add0.x0_0_0_0_addr2; + x.addr3 = comp_bias_add0.x0_0_0_0_addr3; + bias.addr0 = comp_bias_add0.bias0_addr0; + comp_bias_add0.bias0_read_data = bias.read_data; + x1.addr0 = comp_bias_add0.x10_0_0_0_addr0; + x1.addr1 = comp_bias_add0.x10_0_0_0_addr1; + x1.addr2 = comp_bias_add0.x10_0_0_0_addr2; + x1.addr3 = comp_bias_add0.x10_0_0_0_addr3; + x1.write_data = comp_bias_add0.x10_0_0_0_write_data; + x1.write_en = comp_bias_add0.x10_0_0_0_write_en; + comp_bias_add0.x10_0_0_0_done = x1.done; + comp_bias_add0.go = 1'd1; + run_bias_add0[done] = comp_bias_add0.done ? 1'd1; } } control { seq { - run_bias_add; + run_bias_add0; } } } diff --git a/frontends/relay-futil/tests/broadcast.expect b/frontends/relay-futil/tests/broadcast.expect index 84f5962b54..5eb74f0ca1 100644 --- a/frontends/relay-futil/tests/broadcast.expect +++ b/frontends/relay-futil/tests/broadcast.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component add(go: 1, clk: 1, x10_0_read_data: 32, x10_0_done: 1, x20_0_0_read_data: 32, x20_0_0_done: 1, x30_0_0_read_data: 32, x30_0_0_done: 1) -> (done: 1, x10_0_addr0: 2, x10_0_addr1: 2, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, x20_0_0_addr0: 2, x20_0_0_addr1: 1, x20_0_0_addr2: 1, x20_0_0_write_data: 32, x20_0_0_write_en: 1, x20_0_0_clk: 1, x30_0_0_addr0: 2, x30_0_0_addr1: 2, x30_0_0_addr2: 2, x30_0_0_write_data: 32, x30_0_0_write_en: 1, x30_0_0_clk: 1) { +component add0(go: 1, clk: 1, x10_0_read_data: 32, x10_0_done: 1, x20_0_0_read_data: 32, x20_0_0_done: 1, x30_0_0_read_data: 32, x30_0_0_done: 1) -> (done: 1, x10_0_addr0: 2, x10_0_addr1: 2, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, x20_0_0_addr0: 2, x20_0_0_addr1: 1, x20_0_0_addr2: 1, x20_0_0_write_data: 32, x20_0_0_write_en: 1, x20_0_0_clk: 1, x30_0_0_addr0: 2, x30_0_0_addr1: 2, x30_0_0_addr2: 2, x30_0_0_write_data: 32, x30_0_0_write_en: 1, x30_0_0_clk: 1) { cells { add0 = prim std_add(32); add1 = prim std_add(2); @@ -139,30 +139,30 @@ component main () -> () { x3 = prim std_mem_d3(32, 2, 2, 2, 2, 2, 2); x1 = prim std_mem_d2(32, 2, 2, 2, 2); x2 = prim std_mem_d3(32, 2, 1, 1, 2, 1, 1); - add0 = add; + comp_add0 = add0; } wires { - group run_add { - x1.addr0 = add0.x10_0_addr0; - add0.x10_0_read_data = x1.read_data; - x1.addr1 = add0.x10_0_addr1; - x2.addr0 = add0.x20_0_0_addr0; - add0.x20_0_0_read_data = x2.read_data; - x2.addr1 = add0.x20_0_0_addr1; - x2.addr2 = add0.x20_0_0_addr2; - x3.addr0 = add0.x30_0_0_addr0; - x3.addr1 = add0.x30_0_0_addr1; - x3.addr2 = add0.x30_0_0_addr2; - x3.write_data = add0.x30_0_0_write_data; - x3.write_en = add0.x30_0_0_write_en; - add0.x30_0_0_done = x3.done; - add0.go = 1'd1; - run_add[done] = add0.done ? 1'd1; + group run_add0 { + x1.addr0 = comp_add0.x10_0_addr0; + comp_add0.x10_0_read_data = x1.read_data; + x1.addr1 = comp_add0.x10_0_addr1; + x2.addr0 = comp_add0.x20_0_0_addr0; + comp_add0.x20_0_0_read_data = x2.read_data; + x2.addr1 = comp_add0.x20_0_0_addr1; + x2.addr2 = comp_add0.x20_0_0_addr2; + x3.addr0 = comp_add0.x30_0_0_addr0; + x3.addr1 = comp_add0.x30_0_0_addr1; + x3.addr2 = comp_add0.x30_0_0_addr2; + x3.write_data = comp_add0.x30_0_0_write_data; + x3.write_en = comp_add0.x30_0_0_write_en; + comp_add0.x30_0_0_done = x3.done; + comp_add0.go = 1'd1; + run_add0[done] = comp_add0.done ? 1'd1; } } control { seq { - run_add; + run_add0; } } } diff --git a/frontends/relay-futil/tests/conv2d.expect b/frontends/relay-futil/tests/conv2d.expect index 1d2163c61e..aa5ca04744 100644 --- a/frontends/relay-futil/tests/conv2d.expect +++ b/frontends/relay-futil/tests/conv2d.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component conv2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, weight0_0_0_0_read_data: 32, weight0_0_0_0_done: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1) -> (done: 1, data0_0_0_0_addr0: 3, data0_0_0_0_addr1: 10, data0_0_0_0_addr2: 4, data0_0_0_0_addr3: 4, data0_0_0_0_write_data: 32, data0_0_0_0_write_en: 1, data0_0_0_0_clk: 1, weight0_0_0_0_addr0: 10, weight0_0_0_0_addr1: 10, weight0_0_0_0_addr2: 2, weight0_0_0_0_addr3: 2, weight0_0_0_0_write_data: 32, weight0_0_0_0_write_en: 1, weight0_0_0_0_clk: 1, x0_0_0_0_addr0: 3, x0_0_0_0_addr1: 10, x0_0_0_0_addr2: 4, x0_0_0_0_addr3: 4, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1) { +component conv2d0(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, weight0_0_0_0_read_data: 32, weight0_0_0_0_done: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1) -> (done: 1, data0_0_0_0_addr0: 3, data0_0_0_0_addr1: 10, data0_0_0_0_addr2: 4, data0_0_0_0_addr3: 4, data0_0_0_0_write_data: 32, data0_0_0_0_write_en: 1, data0_0_0_0_clk: 1, weight0_0_0_0_addr0: 10, weight0_0_0_0_addr1: 10, weight0_0_0_0_addr2: 2, weight0_0_0_0_addr3: 2, weight0_0_0_0_write_data: 32, weight0_0_0_0_write_en: 1, weight0_0_0_0_clk: 1, x0_0_0_0_addr0: 3, x0_0_0_0_addr1: 10, x0_0_0_0_addr2: 4, x0_0_0_0_addr3: 4, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1) { cells { add0 = prim std_add(32); add1 = prim std_add(32); @@ -362,34 +362,34 @@ component main () -> () { x = prim std_mem_d4(32, 5, 512, 14, 14, 3, 10, 4, 4); data = prim std_mem_d4(32, 5, 512, 14, 14, 3, 10, 4, 4); weight = prim std_mem_d4(32, 512, 512, 3, 3, 10, 10, 2, 2); - conv2d0 = conv2d; + comp_conv2d0 = conv2d0; } wires { - group run_conv2d { - data.addr0 = conv2d0.data0_0_0_0_addr0; - conv2d0.data0_0_0_0_read_data = data.read_data; - data.addr1 = conv2d0.data0_0_0_0_addr1; - data.addr2 = conv2d0.data0_0_0_0_addr2; - data.addr3 = conv2d0.data0_0_0_0_addr3; - weight.addr0 = conv2d0.weight0_0_0_0_addr0; - conv2d0.weight0_0_0_0_read_data = weight.read_data; - weight.addr1 = conv2d0.weight0_0_0_0_addr1; - weight.addr2 = conv2d0.weight0_0_0_0_addr2; - weight.addr3 = conv2d0.weight0_0_0_0_addr3; - x.addr0 = conv2d0.x0_0_0_0_addr0; - x.addr1 = conv2d0.x0_0_0_0_addr1; - x.addr2 = conv2d0.x0_0_0_0_addr2; - x.addr3 = conv2d0.x0_0_0_0_addr3; - x.write_data = conv2d0.x0_0_0_0_write_data; - x.write_en = conv2d0.x0_0_0_0_write_en; - conv2d0.x0_0_0_0_done = x.done; - conv2d0.go = 1'd1; - run_conv2d[done] = conv2d0.done ? 1'd1; + group run_conv2d0 { + data.addr0 = comp_conv2d0.data0_0_0_0_addr0; + comp_conv2d0.data0_0_0_0_read_data = data.read_data; + data.addr1 = comp_conv2d0.data0_0_0_0_addr1; + data.addr2 = comp_conv2d0.data0_0_0_0_addr2; + data.addr3 = comp_conv2d0.data0_0_0_0_addr3; + weight.addr0 = comp_conv2d0.weight0_0_0_0_addr0; + comp_conv2d0.weight0_0_0_0_read_data = weight.read_data; + weight.addr1 = comp_conv2d0.weight0_0_0_0_addr1; + weight.addr2 = comp_conv2d0.weight0_0_0_0_addr2; + weight.addr3 = comp_conv2d0.weight0_0_0_0_addr3; + x.addr0 = comp_conv2d0.x0_0_0_0_addr0; + x.addr1 = comp_conv2d0.x0_0_0_0_addr1; + x.addr2 = comp_conv2d0.x0_0_0_0_addr2; + x.addr3 = comp_conv2d0.x0_0_0_0_addr3; + x.write_data = comp_conv2d0.x0_0_0_0_write_data; + x.write_en = comp_conv2d0.x0_0_0_0_write_en; + comp_conv2d0.x0_0_0_0_done = x.done; + comp_conv2d0.go = 1'd1; + run_conv2d0[done] = comp_conv2d0.done ? 1'd1; } } control { seq { - run_conv2d; + run_conv2d0; } } } diff --git a/frontends/relay-futil/tests/dense.expect b/frontends/relay-futil/tests/dense.expect index a0d5ead2b1..9ca0f57adb 100644 --- a/frontends/relay-futil/tests/dense.expect +++ b/frontends/relay-futil/tests/dense.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component dense(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1, y0_0_read_data: 32, y0_0_done: 1) -> (done: 1, x0_0_addr0: 1, x0_0_addr1: 13, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 4, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, y0_0_addr0: 4, y0_0_addr1: 13, y0_0_write_data: 32, y0_0_write_en: 1, y0_0_clk: 1) { +component dense0(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1, y0_0_read_data: 32, y0_0_done: 1) -> (done: 1, x0_0_addr0: 1, x0_0_addr1: 13, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 4, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, y0_0_addr0: 4, y0_0_addr1: 13, y0_0_write_data: 32, y0_0_write_en: 1, y0_0_clk: 1) { cells { add0 = prim std_add(13); add1 = prim std_add(4); @@ -307,28 +307,28 @@ component main () -> () { x1 = prim std_mem_d2(32, 1, 10, 1, 4); x = prim std_mem_d2(32, 1, 4096, 1, 13); y = prim std_mem_d2(32, 10, 4096, 4, 13); - dense0 = dense; + comp_dense0 = dense0; } wires { - group run_dense { - x.addr0 = dense0.x0_0_addr0; - dense0.x0_0_read_data = x.read_data; - x.addr1 = dense0.x0_0_addr1; - y.addr0 = dense0.y0_0_addr0; - dense0.y0_0_read_data = y.read_data; - y.addr1 = dense0.y0_0_addr1; - x1.addr0 = dense0.x10_0_addr0; - x1.addr1 = dense0.x10_0_addr1; - x1.write_data = dense0.x10_0_write_data; - x1.write_en = dense0.x10_0_write_en; - dense0.x10_0_done = x1.done; - dense0.go = 1'd1; - run_dense[done] = dense0.done ? 1'd1; + group run_dense0 { + x.addr0 = comp_dense0.x0_0_addr0; + comp_dense0.x0_0_read_data = x.read_data; + x.addr1 = comp_dense0.x0_0_addr1; + y.addr0 = comp_dense0.y0_0_addr0; + comp_dense0.y0_0_read_data = y.read_data; + y.addr1 = comp_dense0.y0_0_addr1; + x1.addr0 = comp_dense0.x10_0_addr0; + x1.addr1 = comp_dense0.x10_0_addr1; + x1.write_data = comp_dense0.x10_0_write_data; + x1.write_en = comp_dense0.x10_0_write_en; + comp_dense0.x10_0_done = x1.done; + comp_dense0.go = 1'd1; + run_dense0[done] = comp_dense0.done ? 1'd1; } } control { seq { - run_dense; + run_dense0; } } } diff --git a/frontends/relay-futil/tests/fixed_point_add.expect b/frontends/relay-futil/tests/fixed_point_add.expect index aa8240b4cf..9c4910177e 100644 --- a/frontends/relay-futil/tests/fixed_point_add.expect +++ b/frontends/relay-futil/tests/fixed_point_add.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component add(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { +component add0(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { add0 = prim fixed_p_std_add(32, 16, 16); add1 = prim std_add(1); @@ -74,25 +74,25 @@ component main () -> () { z = prim std_mem_d1(32, 1, 1); x = prim std_mem_d1(32, 1, 1); y = prim std_mem_d1(32, 1, 1); - add0 = add; + comp_add0 = add0; } wires { - group run_add { - x.addr0 = add0.x0_addr0; - add0.x0_read_data = x.read_data; - y.addr0 = add0.y0_addr0; - add0.y0_read_data = y.read_data; - z.addr0 = add0.z0_addr0; - z.write_data = add0.z0_write_data; - z.write_en = add0.z0_write_en; - add0.z0_done = z.done; - add0.go = 1'd1; - run_add[done] = add0.done ? 1'd1; + group run_add0 { + x.addr0 = comp_add0.x0_addr0; + comp_add0.x0_read_data = x.read_data; + y.addr0 = comp_add0.y0_addr0; + comp_add0.y0_read_data = y.read_data; + z.addr0 = comp_add0.z0_addr0; + z.write_data = comp_add0.z0_write_data; + z.write_en = comp_add0.z0_write_en; + comp_add0.z0_done = z.done; + comp_add0.go = 1'd1; + run_add0[done] = comp_add0.done ? 1'd1; } } control { seq { - run_add; + run_add0; } } } diff --git a/frontends/relay-futil/tests/let1.expect b/frontends/relay-futil/tests/let1.expect index cf228003ae..e59cbebcd9 100644 --- a/frontends/relay-futil/tests/let1.expect +++ b/frontends/relay-futil/tests/let1.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component multiply(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { +component multiply0(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(1); @@ -82,25 +82,25 @@ component main () -> () { z = prim std_mem_d1(32, 1, 1); a = prim std_mem_d1(32, 1, 1); b = prim std_mem_d1(32, 1, 1); - multiply0 = multiply; + comp_multiply0 = multiply0; } wires { - group run_multiply { - a.addr0 = multiply0.a0_addr0; - multiply0.a0_read_data = a.read_data; - b.addr0 = multiply0.b0_addr0; - multiply0.b0_read_data = b.read_data; - z.addr0 = multiply0.z0_addr0; - z.write_data = multiply0.z0_write_data; - z.write_en = multiply0.z0_write_en; - multiply0.z0_done = z.done; - multiply0.go = 1'd1; - run_multiply[done] = multiply0.done ? 1'd1; + group run_multiply0 { + a.addr0 = comp_multiply0.a0_addr0; + comp_multiply0.a0_read_data = a.read_data; + b.addr0 = comp_multiply0.b0_addr0; + comp_multiply0.b0_read_data = b.read_data; + z.addr0 = comp_multiply0.z0_addr0; + z.write_data = comp_multiply0.z0_write_data; + z.write_en = comp_multiply0.z0_write_en; + comp_multiply0.z0_done = z.done; + comp_multiply0.go = 1'd1; + run_multiply0[done] = comp_multiply0.done ? 1'd1; } } control { seq { - run_multiply; + run_multiply0; } } } diff --git a/frontends/relay-futil/tests/let2.expect b/frontends/relay-futil/tests/let2.expect index c4b8afc3cb..451a17e8df 100644 --- a/frontends/relay-futil/tests/let2.expect +++ b/frontends/relay-futil/tests/let2.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component add(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { +component add0(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(32); @@ -69,7 +69,7 @@ component add(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_ } } -component multiply(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { +component multiply0(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(1); @@ -151,40 +151,40 @@ component main () -> () { d = prim std_mem_d1(32, 1, 1); c = prim std_mem_d1(32, 1, 1); a = prim std_mem_d1(32, 1, 1); - add0 = add; + comp_add0 = add0; b = prim std_mem_d1(32, 1, 1); - multiply0 = multiply; + comp_multiply0 = multiply0; } wires { - group run_multiply { - a.addr0 = multiply0.a0_addr0; - multiply0.a0_read_data = a.read_data; - b.addr0 = multiply0.b0_addr0; - multiply0.b0_read_data = b.read_data; - c.addr0 = multiply0.c0_addr0; - c.write_data = multiply0.c0_write_data; - c.write_en = multiply0.c0_write_en; - multiply0.c0_done = c.done; - multiply0.go = 1'd1; - run_multiply[done] = multiply0.done ? 1'd1; - } - group run_add { - c.addr0 = add0.c0_addr0; - add0.c0_read_data = c.read_data; - a.addr0 = add0.a0_addr0; - add0.a0_read_data = a.read_data; - d.addr0 = add0.d0_addr0; - d.write_data = add0.d0_write_data; - d.write_en = add0.d0_write_en; - add0.d0_done = d.done; - add0.go = 1'd1; - run_add[done] = add0.done ? 1'd1; + group run_multiply0 { + a.addr0 = comp_multiply0.a0_addr0; + comp_multiply0.a0_read_data = a.read_data; + b.addr0 = comp_multiply0.b0_addr0; + comp_multiply0.b0_read_data = b.read_data; + c.addr0 = comp_multiply0.c0_addr0; + c.write_data = comp_multiply0.c0_write_data; + c.write_en = comp_multiply0.c0_write_en; + comp_multiply0.c0_done = c.done; + comp_multiply0.go = 1'd1; + run_multiply0[done] = comp_multiply0.done ? 1'd1; + } + group run_add0 { + c.addr0 = comp_add0.c0_addr0; + comp_add0.c0_read_data = c.read_data; + a.addr0 = comp_add0.a0_addr0; + comp_add0.a0_read_data = a.read_data; + d.addr0 = comp_add0.d0_addr0; + d.write_data = comp_add0.d0_write_data; + d.write_en = comp_add0.d0_write_en; + comp_add0.d0_done = d.done; + comp_add0.go = 1'd1; + run_add0[done] = comp_add0.done ? 1'd1; } } control { seq { - run_multiply; - run_add; + run_multiply0; + run_add0; } } } diff --git a/frontends/relay-futil/tests/let3.expect b/frontends/relay-futil/tests/let3.expect index cbb0783fa8..222268b304 100644 --- a/frontends/relay-futil/tests/let3.expect +++ b/frontends/relay-futil/tests/let3.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component multiply(go: 1, clk: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1, e0_read_data: 32, e0_done: 1) -> (done: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1, e0_addr0: 1, e0_write_data: 32, e0_write_en: 1, e0_clk: 1) { +component multiply0(go: 1, clk: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1, e0_read_data: 32, e0_done: 1) -> (done: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1, e0_addr0: 1, e0_write_data: 32, e0_write_en: 1, e0_clk: 1) { cells { add0 = prim std_add(1); bin_read0_0 = prim std_reg(32); @@ -77,18 +77,17 @@ component multiply(go: 1, clk: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32 } } -component divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { +component subtract1(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(1); - bin_read0_0 = prim std_reg(32); c_read0_0 = prim std_reg(32); const0 = prim std_const(1, 0); const1 = prim std_const(1, 0); const2 = prim std_const(1, 1); - div_pipe0 = prim std_div_pipe(32); i0 = prim std_reg(1); le0 = prim std_le(1); + sub0 = prim std_sub(32); } wires { group cond0<"static"=0> { @@ -101,14 +100,6 @@ component divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, i0.write_en = 1'd1; let0[done] = i0.done; } - group let1 { - bin_read0_0.in = div_pipe0.out; - bin_read0_0.write_en = div_pipe0.done; - let1[done] = bin_read0_0.done; - div_pipe0.left = c_read0_0.out; - div_pipe0.right = a_read0_0.out; - div_pipe0.go = !div_pipe0.done ? 1'd1; - } group upd0<"static"=1> { c_read0_0.write_en = 1'd1; c0_addr0 = i0.out; @@ -124,7 +115,9 @@ component divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, group upd2<"static"=1> { d0_addr0 = i0.out; d0_write_en = 1'd1; - d0_write_data = 1'd1 ? bin_read0_0.out; + sub0.left = c_read0_0.out; + sub0.right = a_read0_0.out; + d0_write_data = 1'd1 ? sub0.out; upd2[done] = d0_done ? 1'd1; } group upd3<"static"=1> { @@ -145,7 +138,6 @@ component divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, upd0; upd1; } - let1; upd2; upd3; } @@ -154,7 +146,7 @@ component divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, } } -component subtract(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { +component subtract0(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(1); @@ -228,55 +220,55 @@ component main () -> () { e = prim std_mem_d1(32, 1, 1); c = prim std_mem_d1(32, 1, 1); d = prim std_mem_d1(32, 1, 1); - multiply0 = multiply; + comp_multiply0 = multiply0; a = prim std_mem_d1(32, 1, 1); - divide0 = divide; + comp_subtract1 = subtract1; b = prim std_mem_d1(32, 1, 1); - subtract0 = subtract; + comp_subtract0 = subtract0; } wires { - group run_subtract { - a.addr0 = subtract0.a0_addr0; - subtract0.a0_read_data = a.read_data; - b.addr0 = subtract0.b0_addr0; - subtract0.b0_read_data = b.read_data; - c.addr0 = subtract0.c0_addr0; - c.write_data = subtract0.c0_write_data; - c.write_en = subtract0.c0_write_en; - subtract0.c0_done = c.done; - subtract0.go = 1'd1; - run_subtract[done] = subtract0.done ? 1'd1; + group run_subtract0 { + a.addr0 = comp_subtract0.a0_addr0; + comp_subtract0.a0_read_data = a.read_data; + b.addr0 = comp_subtract0.b0_addr0; + comp_subtract0.b0_read_data = b.read_data; + c.addr0 = comp_subtract0.c0_addr0; + c.write_data = comp_subtract0.c0_write_data; + c.write_en = comp_subtract0.c0_write_en; + comp_subtract0.c0_done = c.done; + comp_subtract0.go = 1'd1; + run_subtract0[done] = comp_subtract0.done ? 1'd1; } - group run_divide { - c.addr0 = divide0.c0_addr0; - divide0.c0_read_data = c.read_data; - a.addr0 = divide0.a0_addr0; - divide0.a0_read_data = a.read_data; - d.addr0 = divide0.d0_addr0; - d.write_data = divide0.d0_write_data; - d.write_en = divide0.d0_write_en; - divide0.d0_done = d.done; - divide0.go = 1'd1; - run_divide[done] = divide0.done ? 1'd1; + group run_subtract1 { + c.addr0 = comp_subtract1.c0_addr0; + comp_subtract1.c0_read_data = c.read_data; + a.addr0 = comp_subtract1.a0_addr0; + comp_subtract1.a0_read_data = a.read_data; + d.addr0 = comp_subtract1.d0_addr0; + d.write_data = comp_subtract1.d0_write_data; + d.write_en = comp_subtract1.d0_write_en; + comp_subtract1.d0_done = d.done; + comp_subtract1.go = 1'd1; + run_subtract1[done] = comp_subtract1.done ? 1'd1; } - group run_multiply { - c.addr0 = multiply0.c0_addr0; - multiply0.c0_read_data = c.read_data; - d.addr0 = multiply0.d0_addr0; - multiply0.d0_read_data = d.read_data; - e.addr0 = multiply0.e0_addr0; - e.write_data = multiply0.e0_write_data; - e.write_en = multiply0.e0_write_en; - multiply0.e0_done = e.done; - multiply0.go = 1'd1; - run_multiply[done] = multiply0.done ? 1'd1; + group run_multiply0 { + c.addr0 = comp_multiply0.c0_addr0; + comp_multiply0.c0_read_data = c.read_data; + d.addr0 = comp_multiply0.d0_addr0; + comp_multiply0.d0_read_data = d.read_data; + e.addr0 = comp_multiply0.e0_addr0; + e.write_data = comp_multiply0.e0_write_data; + e.write_en = comp_multiply0.e0_write_en; + comp_multiply0.e0_done = e.done; + comp_multiply0.go = 1'd1; + run_multiply0[done] = comp_multiply0.done ? 1'd1; } } control { seq { - run_subtract; - run_divide; - run_multiply; + run_subtract0; + run_subtract1; + run_multiply0; } } } diff --git a/frontends/relay-futil/tests/let3.relay b/frontends/relay-futil/tests/let3.relay index 50aa9a8064..725e75ab94 100644 --- a/frontends/relay-futil/tests/let3.relay +++ b/frontends/relay-futil/tests/let3.relay @@ -1,7 +1,7 @@ v0.0.4 fn (%a: int32, %b: int32) { let %c = subtract(%a, %b); - let %d = divide(%c, %a); + let %d = subtract(%c, %a); let %e = multiply(%c, %d); %e } diff --git a/frontends/relay-futil/tests/max_pool2d.expect b/frontends/relay-futil/tests/max_pool2d.expect index 47decb83ba..ee3eb04a8b 100644 --- a/frontends/relay-futil/tests/max_pool2d.expect +++ b/frontends/relay-futil/tests/max_pool2d.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component max_pool2d(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, result0_0_0_0_read_data: 32, result0_0_0_0_done: 1) -> (done: 1, data0_0_0_0_addr0: 2, data0_0_0_0_addr1: 2, data0_0_0_0_addr2: 3, data0_0_0_0_addr3: 3, data0_0_0_0_write_data: 32, data0_0_0_0_write_en: 1, data0_0_0_0_clk: 1, result0_0_0_0_addr0: 2, result0_0_0_0_addr1: 2, result0_0_0_0_addr2: 2, result0_0_0_0_addr3: 2, result0_0_0_0_write_data: 32, result0_0_0_0_write_en: 1, result0_0_0_0_clk: 1) { +component max_pool2d0(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, result0_0_0_0_read_data: 32, result0_0_0_0_done: 1) -> (done: 1, data0_0_0_0_addr0: 2, data0_0_0_0_addr1: 2, data0_0_0_0_addr2: 3, data0_0_0_0_addr3: 3, data0_0_0_0_write_data: 32, data0_0_0_0_write_en: 1, data0_0_0_0_clk: 1, result0_0_0_0_addr0: 2, result0_0_0_0_addr1: 2, result0_0_0_0_addr2: 2, result0_0_0_0_addr3: 2, result0_0_0_0_write_data: 32, result0_0_0_0_write_en: 1, result0_0_0_0_clk: 1) { cells { add0 = prim std_add(32); add1 = prim std_add(32); @@ -326,29 +326,29 @@ component main () -> () { cells { result = prim std_mem_d4(32, 2, 2, 2, 2, 2, 2, 2, 2); data = prim std_mem_d4(32, 2, 2, 4, 4, 2, 2, 3, 3); - max_pool2d0 = max_pool2d; + comp_max_pool2d0 = max_pool2d0; } wires { - group run_max_pool2d { - data.addr0 = max_pool2d0.data0_0_0_0_addr0; - max_pool2d0.data0_0_0_0_read_data = data.read_data; - data.addr1 = max_pool2d0.data0_0_0_0_addr1; - data.addr2 = max_pool2d0.data0_0_0_0_addr2; - data.addr3 = max_pool2d0.data0_0_0_0_addr3; - result.addr0 = max_pool2d0.result0_0_0_0_addr0; - result.addr1 = max_pool2d0.result0_0_0_0_addr1; - result.addr2 = max_pool2d0.result0_0_0_0_addr2; - result.addr3 = max_pool2d0.result0_0_0_0_addr3; - result.write_data = max_pool2d0.result0_0_0_0_write_data; - result.write_en = max_pool2d0.result0_0_0_0_write_en; - max_pool2d0.result0_0_0_0_done = result.done; - max_pool2d0.go = 1'd1; - run_max_pool2d[done] = max_pool2d0.done ? 1'd1; + group run_max_pool2d0 { + data.addr0 = comp_max_pool2d0.data0_0_0_0_addr0; + comp_max_pool2d0.data0_0_0_0_read_data = data.read_data; + data.addr1 = comp_max_pool2d0.data0_0_0_0_addr1; + data.addr2 = comp_max_pool2d0.data0_0_0_0_addr2; + data.addr3 = comp_max_pool2d0.data0_0_0_0_addr3; + result.addr0 = comp_max_pool2d0.result0_0_0_0_addr0; + result.addr1 = comp_max_pool2d0.result0_0_0_0_addr1; + result.addr2 = comp_max_pool2d0.result0_0_0_0_addr2; + result.addr3 = comp_max_pool2d0.result0_0_0_0_addr3; + result.write_data = comp_max_pool2d0.result0_0_0_0_write_data; + result.write_en = comp_max_pool2d0.result0_0_0_0_write_en; + comp_max_pool2d0.result0_0_0_0_done = result.done; + comp_max_pool2d0.go = 1'd1; + run_max_pool2d0[done] = comp_max_pool2d0.done ? 1'd1; } } control { seq { - run_max_pool2d; + run_max_pool2d0; } } } diff --git a/frontends/relay-futil/tests/relu.expect b/frontends/relay-futil/tests/relu.expect index 7a65c37f5a..66f3dd53e8 100644 --- a/frontends/relay-futil/tests/relu.expect +++ b/frontends/relay-futil/tests/relu.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component relu(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, x0_0_0_0_addr0: 2, x0_0_0_0_addr1: 3, x0_0_0_0_addr2: 4, x0_0_0_0_addr3: 6, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 2, x10_0_0_0_addr1: 3, x10_0_0_0_addr2: 4, x10_0_0_0_addr3: 6, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { +component relu0(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, x0_0_0_0_addr0: 2, x0_0_0_0_addr1: 3, x0_0_0_0_addr2: 4, x0_0_0_0_addr3: 6, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 2, x10_0_0_0_addr1: 3, x10_0_0_0_addr2: 4, x10_0_0_0_addr3: 6, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { cells { add0 = prim std_add(6); add1 = prim std_add(4); @@ -193,29 +193,29 @@ component main () -> () { cells { x1 = prim std_mem_d4(32, 2, 4, 8, 32, 2, 3, 4, 6); x = prim std_mem_d4(32, 2, 4, 8, 32, 2, 3, 4, 6); - relu0 = relu; + comp_relu0 = relu0; } wires { - group run_relu { - x.addr0 = relu0.x0_0_0_0_addr0; - relu0.x0_0_0_0_read_data = x.read_data; - x.addr1 = relu0.x0_0_0_0_addr1; - x.addr2 = relu0.x0_0_0_0_addr2; - x.addr3 = relu0.x0_0_0_0_addr3; - x1.addr0 = relu0.x10_0_0_0_addr0; - x1.addr1 = relu0.x10_0_0_0_addr1; - x1.addr2 = relu0.x10_0_0_0_addr2; - x1.addr3 = relu0.x10_0_0_0_addr3; - x1.write_data = relu0.x10_0_0_0_write_data; - x1.write_en = relu0.x10_0_0_0_write_en; - relu0.x10_0_0_0_done = x1.done; - relu0.go = 1'd1; - run_relu[done] = relu0.done ? 1'd1; + group run_relu0 { + x.addr0 = comp_relu0.x0_0_0_0_addr0; + comp_relu0.x0_0_0_0_read_data = x.read_data; + x.addr1 = comp_relu0.x0_0_0_0_addr1; + x.addr2 = comp_relu0.x0_0_0_0_addr2; + x.addr3 = comp_relu0.x0_0_0_0_addr3; + x1.addr0 = comp_relu0.x10_0_0_0_addr0; + x1.addr1 = comp_relu0.x10_0_0_0_addr1; + x1.addr2 = comp_relu0.x10_0_0_0_addr2; + x1.addr3 = comp_relu0.x10_0_0_0_addr3; + x1.write_data = comp_relu0.x10_0_0_0_write_data; + x1.write_en = comp_relu0.x10_0_0_0_write_en; + comp_relu0.x10_0_0_0_done = x1.done; + comp_relu0.go = 1'd1; + run_relu0[done] = comp_relu0.done ? 1'd1; } } control { seq { - run_relu; + run_relu0; } } } diff --git a/frontends/relay-futil/tests/softmax.expect b/frontends/relay-futil/tests/softmax.expect index 92246625a7..128b2ebc0d 100644 --- a/frontends/relay-futil/tests/softmax.expect +++ b/frontends/relay-futil/tests/softmax.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component softmax(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, x0_0_addr0: 1, x0_0_addr1: 4, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 4, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { +component softmax0(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, x0_0_addr0: 1, x0_0_addr1: 4, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 4, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { cells { add0 = prim fixed_p_std_add(32, 16, 16); add1 = prim std_add(4); @@ -186,25 +186,25 @@ component main () -> () { cells { x1 = prim std_mem_d2(32, 1, 10, 1, 4); x = prim std_mem_d2(32, 1, 10, 1, 4); - softmax0 = softmax; + comp_softmax0 = softmax0; } wires { - group run_softmax { - x.addr0 = softmax0.x0_0_addr0; - softmax0.x0_0_read_data = x.read_data; - x.addr1 = softmax0.x0_0_addr1; - x1.addr0 = softmax0.x10_0_addr0; - x1.addr1 = softmax0.x10_0_addr1; - x1.write_data = softmax0.x10_0_write_data; - x1.write_en = softmax0.x10_0_write_en; - softmax0.x10_0_done = x1.done; - softmax0.go = 1'd1; - run_softmax[done] = softmax0.done ? 1'd1; + group run_softmax0 { + x.addr0 = comp_softmax0.x0_0_addr0; + comp_softmax0.x0_0_read_data = x.read_data; + x.addr1 = comp_softmax0.x0_0_addr1; + x1.addr0 = comp_softmax0.x10_0_addr0; + x1.addr1 = comp_softmax0.x10_0_addr1; + x1.write_data = comp_softmax0.x10_0_write_data; + x1.write_en = comp_softmax0.x10_0_write_en; + comp_softmax0.x10_0_done = x1.done; + comp_softmax0.go = 1'd1; + run_softmax0[done] = comp_softmax0.done ? 1'd1; } } control { seq { - run_softmax; + run_softmax0; } } } diff --git a/frontends/relay-futil/tests/sqrt.expect b/frontends/relay-futil/tests/sqrt.expect index 2963943f4f..ab67351192 100644 --- a/frontends/relay-futil/tests/sqrt.expect +++ b/frontends/relay-futil/tests/sqrt.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component sqrt(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, x0_0_0_0_addr0: 4, x0_0_0_0_addr1: 4, x0_0_0_0_addr2: 5, x0_0_0_0_addr3: 7, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 4, x10_0_0_0_addr1: 4, x10_0_0_0_addr2: 5, x10_0_0_0_addr3: 7, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { +component sqrt0(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, x0_0_0_0_addr0: 4, x0_0_0_0_addr1: 4, x0_0_0_0_addr2: 5, x0_0_0_0_addr3: 7, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 4, x10_0_0_0_addr1: 4, x10_0_0_0_addr2: 5, x10_0_0_0_addr3: 7, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { cells { add0 = prim std_add(7); add1 = prim std_add(5); @@ -156,29 +156,29 @@ component main () -> () { cells { x1 = prim std_mem_d4(32, 8, 8, 16, 64, 4, 4, 5, 7); x = prim std_mem_d4(32, 8, 8, 16, 64, 4, 4, 5, 7); - sqrt0 = sqrt; + comp_sqrt0 = sqrt0; } wires { - group run_sqrt { - x.addr0 = sqrt0.x0_0_0_0_addr0; - sqrt0.x0_0_0_0_read_data = x.read_data; - x.addr1 = sqrt0.x0_0_0_0_addr1; - x.addr2 = sqrt0.x0_0_0_0_addr2; - x.addr3 = sqrt0.x0_0_0_0_addr3; - x1.addr0 = sqrt0.x10_0_0_0_addr0; - x1.addr1 = sqrt0.x10_0_0_0_addr1; - x1.addr2 = sqrt0.x10_0_0_0_addr2; - x1.addr3 = sqrt0.x10_0_0_0_addr3; - x1.write_data = sqrt0.x10_0_0_0_write_data; - x1.write_en = sqrt0.x10_0_0_0_write_en; - sqrt0.x10_0_0_0_done = x1.done; - sqrt0.go = 1'd1; - run_sqrt[done] = sqrt0.done ? 1'd1; + group run_sqrt0 { + x.addr0 = comp_sqrt0.x0_0_0_0_addr0; + comp_sqrt0.x0_0_0_0_read_data = x.read_data; + x.addr1 = comp_sqrt0.x0_0_0_0_addr1; + x.addr2 = comp_sqrt0.x0_0_0_0_addr2; + x.addr3 = comp_sqrt0.x0_0_0_0_addr3; + x1.addr0 = comp_sqrt0.x10_0_0_0_addr0; + x1.addr1 = comp_sqrt0.x10_0_0_0_addr1; + x1.addr2 = comp_sqrt0.x10_0_0_0_addr2; + x1.addr3 = comp_sqrt0.x10_0_0_0_addr3; + x1.write_data = comp_sqrt0.x10_0_0_0_write_data; + x1.write_en = comp_sqrt0.x10_0_0_0_write_en; + comp_sqrt0.x10_0_0_0_done = x1.done; + comp_sqrt0.go = 1'd1; + run_sqrt0[done] = comp_sqrt0.done ? 1'd1; } } control { seq { - run_sqrt; + run_sqrt0; } } } diff --git a/frontends/relay-futil/tests/sub.expect b/frontends/relay-futil/tests/sub.expect index c74af4fb2e..7b3f88385d 100644 --- a/frontends/relay-futil/tests/sub.expect +++ b/frontends/relay-futil/tests/sub.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component subtract(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { +component subtract0(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { add0 = prim std_add(1); const0 = prim std_const(1, 0); @@ -74,25 +74,25 @@ component main () -> () { z = prim std_mem_d1(32, 1, 1); x = prim std_mem_d1(32, 1, 1); y = prim std_mem_d1(32, 1, 1); - subtract0 = subtract; + comp_subtract0 = subtract0; } wires { - group run_subtract { - x.addr0 = subtract0.x0_addr0; - subtract0.x0_read_data = x.read_data; - y.addr0 = subtract0.y0_addr0; - subtract0.y0_read_data = y.read_data; - z.addr0 = subtract0.z0_addr0; - z.write_data = subtract0.z0_write_data; - z.write_en = subtract0.z0_write_en; - subtract0.z0_done = z.done; - subtract0.go = 1'd1; - run_subtract[done] = subtract0.done ? 1'd1; + group run_subtract0 { + x.addr0 = comp_subtract0.x0_addr0; + comp_subtract0.x0_read_data = x.read_data; + y.addr0 = comp_subtract0.y0_addr0; + comp_subtract0.y0_read_data = y.read_data; + z.addr0 = comp_subtract0.z0_addr0; + z.write_data = comp_subtract0.z0_write_data; + z.write_en = comp_subtract0.z0_write_en; + comp_subtract0.z0_done = z.done; + comp_subtract0.go = 1'd1; + run_subtract0[done] = comp_subtract0.done ? 1'd1; } } control { seq { - run_subtract; + run_subtract0; } } } diff --git a/frontends/relay-futil/tests/tensor1d_mult.expect b/frontends/relay-futil/tests/tensor1d_mult.expect index dac0e76d85..a2b7d5d802 100644 --- a/frontends/relay-futil/tests/tensor1d_mult.expect +++ b/frontends/relay-futil/tests/tensor1d_mult.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component multiply(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, x10_read_data: 32, x10_done: 1, y0_read_data: 32, y0_done: 1) -> (done: 1, x0_addr0: 3, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, x10_addr0: 3, x10_write_data: 32, x10_write_en: 1, x10_clk: 1, y0_addr0: 3, y0_write_data: 32, y0_write_en: 1, y0_clk: 1) { +component multiply0(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, x10_read_data: 32, x10_done: 1, y0_read_data: 32, y0_done: 1) -> (done: 1, x0_addr0: 3, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, x10_addr0: 3, x10_write_data: 32, x10_write_en: 1, x10_clk: 1, y0_addr0: 3, y0_write_data: 32, y0_write_en: 1, y0_clk: 1) { cells { add0 = prim std_add(3); bin_read0_0 = prim std_reg(32); @@ -82,25 +82,25 @@ component main () -> () { x1 = prim std_mem_d1(32, 4, 3); x = prim std_mem_d1(32, 4, 3); y = prim std_mem_d1(32, 4, 3); - multiply0 = multiply; + comp_multiply0 = multiply0; } wires { - group run_multiply { - x.addr0 = multiply0.x0_addr0; - multiply0.x0_read_data = x.read_data; - y.addr0 = multiply0.y0_addr0; - multiply0.y0_read_data = y.read_data; - x1.addr0 = multiply0.x10_addr0; - x1.write_data = multiply0.x10_write_data; - x1.write_en = multiply0.x10_write_en; - multiply0.x10_done = x1.done; - multiply0.go = 1'd1; - run_multiply[done] = multiply0.done ? 1'd1; + group run_multiply0 { + x.addr0 = comp_multiply0.x0_addr0; + comp_multiply0.x0_read_data = x.read_data; + y.addr0 = comp_multiply0.y0_addr0; + comp_multiply0.y0_read_data = y.read_data; + x1.addr0 = comp_multiply0.x10_addr0; + x1.write_data = comp_multiply0.x10_write_data; + x1.write_en = comp_multiply0.x10_write_en; + comp_multiply0.x10_done = x1.done; + comp_multiply0.go = 1'd1; + run_multiply0[done] = comp_multiply0.done ? 1'd1; } } control { seq { - run_multiply; + run_multiply0; } } } diff --git a/frontends/relay-futil/tests/tensor2d_add.expect b/frontends/relay-futil/tests/tensor2d_add.expect index d289badb27..c6a409b5af 100644 --- a/frontends/relay-futil/tests/tensor2d_add.expect +++ b/frontends/relay-futil/tests/tensor2d_add.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component add(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1, y0_0_read_data: 32, y0_0_done: 1) -> (done: 1, x0_0_addr0: 2, x0_0_addr1: 3, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 2, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, y0_0_addr0: 2, y0_0_addr1: 3, y0_0_write_data: 32, y0_0_write_en: 1, y0_0_clk: 1) { +component add0(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1, y0_0_read_data: 32, y0_0_done: 1) -> (done: 1, x0_0_addr0: 2, x0_0_addr1: 3, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 2, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, y0_0_addr0: 2, y0_0_addr1: 3, y0_0_write_data: 32, y0_0_write_en: 1, y0_0_clk: 1) { cells { add0 = prim std_add(32); add1 = prim std_add(3); @@ -106,28 +106,28 @@ component main () -> () { x1 = prim std_mem_d2(32, 2, 4, 2, 3); x = prim std_mem_d2(32, 2, 4, 2, 3); y = prim std_mem_d2(32, 2, 4, 2, 3); - add0 = add; + comp_add0 = add0; } wires { - group run_add { - x.addr0 = add0.x0_0_addr0; - add0.x0_0_read_data = x.read_data; - x.addr1 = add0.x0_0_addr1; - y.addr0 = add0.y0_0_addr0; - add0.y0_0_read_data = y.read_data; - y.addr1 = add0.y0_0_addr1; - x1.addr0 = add0.x10_0_addr0; - x1.addr1 = add0.x10_0_addr1; - x1.write_data = add0.x10_0_write_data; - x1.write_en = add0.x10_0_write_en; - add0.x10_0_done = x1.done; - add0.go = 1'd1; - run_add[done] = add0.done ? 1'd1; + group run_add0 { + x.addr0 = comp_add0.x0_0_addr0; + comp_add0.x0_0_read_data = x.read_data; + x.addr1 = comp_add0.x0_0_addr1; + y.addr0 = comp_add0.y0_0_addr0; + comp_add0.y0_0_read_data = y.read_data; + y.addr1 = comp_add0.y0_0_addr1; + x1.addr0 = comp_add0.x10_0_addr0; + x1.addr1 = comp_add0.x10_0_addr1; + x1.write_data = comp_add0.x10_0_write_data; + x1.write_en = comp_add0.x10_0_write_en; + comp_add0.x10_0_done = x1.done; + comp_add0.go = 1'd1; + run_add0[done] = comp_add0.done ? 1'd1; } } control { seq { - run_add; + run_add0; } } } diff --git a/frontends/relay-futil/tests/tensor3d_divide.expect b/frontends/relay-futil/tests/tensor3d_divide.expect index a823a0ff96..0476ac946a 100644 --- a/frontends/relay-futil/tests/tensor3d_divide.expect +++ b/frontends/relay-futil/tests/tensor3d_divide.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component divide(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_0_read_data: 32, x10_0_0_done: 1, y0_0_0_read_data: 32, y0_0_0_done: 1) -> (done: 1, x0_0_0_addr0: 3, x0_0_0_addr1: 3, x0_0_0_addr2: 3, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1, x10_0_0_addr0: 3, x10_0_0_addr1: 3, x10_0_0_addr2: 3, x10_0_0_write_data: 32, x10_0_0_write_en: 1, x10_0_0_clk: 1, y0_0_0_addr0: 3, y0_0_0_addr1: 3, y0_0_0_addr2: 3, y0_0_0_write_data: 32, y0_0_0_write_en: 1, y0_0_0_clk: 1) { +component divide0(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_0_read_data: 32, x10_0_0_done: 1, y0_0_0_read_data: 32, y0_0_0_done: 1) -> (done: 1, x0_0_0_addr0: 3, x0_0_0_addr1: 3, x0_0_0_addr2: 3, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1, x10_0_0_addr0: 3, x10_0_0_addr1: 3, x10_0_0_addr2: 3, x10_0_0_write_data: 32, x10_0_0_write_en: 1, x10_0_0_clk: 1, y0_0_0_addr0: 3, y0_0_0_addr1: 3, y0_0_0_addr2: 3, y0_0_0_write_data: 32, y0_0_0_write_en: 1, y0_0_0_clk: 1) { cells { add0 = prim std_add(3); add1 = prim std_add(3); @@ -146,31 +146,31 @@ component main () -> () { x1 = prim std_mem_d3(32, 4, 5, 6, 3, 3, 3); x = prim std_mem_d3(32, 4, 5, 6, 3, 3, 3); y = prim std_mem_d3(32, 4, 5, 6, 3, 3, 3); - divide0 = divide; + comp_divide0 = divide0; } wires { - group run_divide { - x.addr0 = divide0.x0_0_0_addr0; - divide0.x0_0_0_read_data = x.read_data; - x.addr1 = divide0.x0_0_0_addr1; - x.addr2 = divide0.x0_0_0_addr2; - y.addr0 = divide0.y0_0_0_addr0; - divide0.y0_0_0_read_data = y.read_data; - y.addr1 = divide0.y0_0_0_addr1; - y.addr2 = divide0.y0_0_0_addr2; - x1.addr0 = divide0.x10_0_0_addr0; - x1.addr1 = divide0.x10_0_0_addr1; - x1.addr2 = divide0.x10_0_0_addr2; - x1.write_data = divide0.x10_0_0_write_data; - x1.write_en = divide0.x10_0_0_write_en; - divide0.x10_0_0_done = x1.done; - divide0.go = 1'd1; - run_divide[done] = divide0.done ? 1'd1; + group run_divide0 { + x.addr0 = comp_divide0.x0_0_0_addr0; + comp_divide0.x0_0_0_read_data = x.read_data; + x.addr1 = comp_divide0.x0_0_0_addr1; + x.addr2 = comp_divide0.x0_0_0_addr2; + y.addr0 = comp_divide0.y0_0_0_addr0; + comp_divide0.y0_0_0_read_data = y.read_data; + y.addr1 = comp_divide0.y0_0_0_addr1; + y.addr2 = comp_divide0.y0_0_0_addr2; + x1.addr0 = comp_divide0.x10_0_0_addr0; + x1.addr1 = comp_divide0.x10_0_0_addr1; + x1.addr2 = comp_divide0.x10_0_0_addr2; + x1.write_data = comp_divide0.x10_0_0_write_data; + x1.write_en = comp_divide0.x10_0_0_write_en; + comp_divide0.x10_0_0_done = x1.done; + comp_divide0.go = 1'd1; + run_divide0[done] = comp_divide0.done ? 1'd1; } } control { seq { - run_divide; + run_divide0; } } } diff --git a/frontends/relay-futil/utilities.py b/frontends/relay-futil/utilities.py index 69cc8c4e0a..58fbb1ebc1 100644 --- a/frontends/relay-futil/utilities.py +++ b/frontends/relay-futil/utilities.py @@ -77,12 +77,12 @@ def get_memory_parameters(type): return data, NumDimensionsToPrimitive[num_dimensions], data_type -def build_main_controls(c: FComponent): +def build_main_controls(component: FComponent): ''' Builds the wires and control for the `main` component. This is done by creating a group `run_*` with its respective wiring for each Relay function call, and adding it to the control. ''' - for cell in reversed(c.cells.values()): + for cell in reversed(component.cells.values()): if not cell.is_relay_function(): continue function = cell.relay_function inputs, output = function.inputs, function.output @@ -115,8 +115,8 @@ def build_main_controls(c: FComponent): wires.append(FWire(f'{function.name}.{output.dahlia_name}_done', f'{output_name}.done')) wires.append(FWire(f'{function.name}.go', "1'd1")) wires.append(FWire(f'{group_name}[done]', f"{function.name}.done ? 1'd1")) - c.wires.append(FConnection(group=FGroup(name=group_name, wires=wires, attributes=[]))) + component.wires.append(FConnection(group=FGroup(name=group_name, wires=wires, attributes=[]))) # Ensures that only group names make it into the controls of a FuTIL component. - connections = list(filter(lambda w: w.is_group(), c.wires)) - c.controls = [Seq(stmts=list(map(lambda w: w.group.name, connections)))] + connections = list(filter(lambda w: w.is_group(), component.wires)) + component.controls = [Seq(stmts=list(map(lambda w: w.group.name, connections)))] From 06dff4f5794b19d142bd8475d46b780a3264f107 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Thu, 10 Dec 2020 20:11:08 -0500 Subject: [PATCH 73/75] Fix fud externalize stage. --- fud/fud/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fud/fud/main.py b/fud/fud/main.py index 6b78b1db62..5bcbb3d7a3 100644 --- a/fud/fud/main.py +++ b/fud/fud/main.py @@ -40,10 +40,10 @@ def register_stages(registry, cfg): cfg, 'futil-noinline', '-b futil -d hole-inliner', 'Compile FuTIL to FuTIL to remove all control and inline groups' )) - registry.register( - futil.FutilStage(config, 'futil-externalize', '-b futil -p externalize', - 'Compile FuTIL to FuTIL to externalize all external memory primitives')) + futil.FutilStage(cfg, 'futil-externalize', '-b futil -p externalize', + 'Compile FuTIL to FuTIL to externalize all external memory primitives' + )) # Verilator registry.register( From f2290cb84b384f19e6e0d1d926c71dbcb1da7c9b Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sun, 20 Dec 2020 08:06:56 -0500 Subject: [PATCH 74/75] Remove primitive library changes. --- primitives/std.lib | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/primitives/std.lib b/primitives/std.lib index 30648aa7d8..2386742b88 100644 --- a/primitives/std.lib +++ b/primitives/std.lib @@ -63,7 +63,7 @@ primitive std_mem_d2[width, d0_size, d1_size, d0_idx_size, d1_idx_size]( parameter d1_idx_size = 4) (input logic [d0_idx_size-1:0] addr0, input logic [d1_idx_size-1:0] addr1, - input logic [width-1:0] write_data /*verilator public*/, + input logic [width-1:0] write_data, input logic write_en, input logic clk, output logic [width-1:0] read_data, @@ -849,7 +849,6 @@ primitive fixed_p_std_const[width, int_width, fract_width, value1, value2] () -> parameter value2 = 0) (output logic [width-1:0] out); - /* verilator lint_off WIDTHCONCAT */ assign out = {value1, value2}; endmodule } From c3018f5dd2e6218a37ca18d5774aab2775ad8597 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Sun, 20 Dec 2020 08:10:12 -0500 Subject: [PATCH 75/75] Mark softmax as unimplemented. --- frontends/relay-futil/dahlia_lowering.py | 1 + 1 file changed, 1 insertion(+) diff --git a/frontends/relay-futil/dahlia_lowering.py b/frontends/relay-futil/dahlia_lowering.py index 3bb822e681..6b83a28784 100644 --- a/frontends/relay-futil/dahlia_lowering.py +++ b/frontends/relay-futil/dahlia_lowering.py @@ -386,6 +386,7 @@ def dense(function): # TODO(cgyurgyik): Currently, only supports a small subset (namely those used in our VGG net and MLP net examples). def softmax(function): """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.softmax""" + assert False, "Unimplemented." op, res = function.inputs[0].primitive, function.output.primitive axis = function.attributes.get_int("axis") data_type = op.data_type