diff --git a/frontends/relay-futil/compiler.py b/frontends/relay-futil/compiler.py index 86fd5bd3e3..d3eb4461d3 100644 --- a/frontends/relay-futil/compiler.py +++ b/frontends/relay-futil/compiler.py @@ -7,20 +7,7 @@ from pretty_print import * from utilities import * from futil_ast import * -from dahlia_functions import * - -# Mapping from Relay binary calls to the respective Dahlia operator. -BuiltInBinaryOps = {'add': '+', 'divide': '/', 'multiply': '*', 'subtract': '-'} - -# Mapping from Relay function names to their respective Dahlia lowering. -RelayFunctionCalls = {'nn.dense': dense, 'nn.batch_flatten': batch_flatten, 'nn.batch_matmul': batch_matmul, - 'nn.bias_add': bias_add, 'nn.relu': relu, 'negative': negative, 'expand_dims': expand_dims, - 'sqrt': sqrt} - -# Mapping between primitive type and associated Dahlia name extension. -# E.g. A 2D memory primitive named `A` will be lowered to `A0_0`. -DahliaNameExtension = {PrimitiveType.Memory1D: '0', PrimitiveType.Memory2D: '0_0', - PrimitiveType.Memory3D: '0_0_0', PrimitiveType.Memory4D: '0_0_0_0'} +from dahlia_lowering import * class Relay2Futil(ExprFunctor): @@ -30,17 +17,16 @@ def __init__(self): super(Relay2Futil, self).__init__() self.id_dictionary = defaultdict(int) self.relay_id_dictionary = defaultdict(int) - self.dahlia_components = [] - self.main = FComponent(name="main", cells=[], wires=[]) + self.main = FComponent(name="main") def id(self, name): """ Provides a unique identification for a given name. For example, if 'a' is seen three times, it will produce: 'a0', 'a1', 'a2'. """ - id_number = self.id_dictionary[name] + id_number = str(self.id_dictionary[name]) self.id_dictionary[name] += 1 - return name + str(id_number) + return ''.join((name, id_number)) def relay_id(self, name): """ @@ -48,97 +34,75 @@ def relay_id(self, name): %x = foo(%y); %x1 = bar(%x); // Here, at this level, the name_hint associated with `x1` is still 'x'. - To avoid this, we provide Relay with its own identification dictionary. If 'x' is seen - three times, it will produce: 'x', 'x1', x2'. + To avoid this, we provide Relay with its own identification dictionary. + If 'x' is seen three times, it will produce: 'x', 'x1', x2'. """ id_number = self.relay_id_dictionary[name] self.relay_id_dictionary[name] += 1 if id_number == 0: return name - return name + str(id_number) + return ''.join((name, str(id_number))) def dahlia_name(self, name, type): """ - Dahlia uses the following naming scheme for an arbitrary variable 'X': - Memory1D: 'X0', 'X1', 'X2', ... - Memory2D: 'X0_0', 'X1_0', 'X2_0', ... - Memory3D: 'X0_0_0', 'X1_0_0', 'X2_0_0', ... + Dahlia uses the following naming scheme for arbitrary variables `X`, `Y`: + Memory1D: `X0`, `Y0` + Memory2D: `X0_0`, `Y0_0` + Memory3D: `X0_0_0`, `Y0_0_0` """ assert type in DahliaNameExtension, f'{name} with {type} is not supported yet.' - return name + DahliaNameExtension[type] + return ''.join((name, DahliaNameExtension[type])) - def get_dahlia_declaration(self, function_name, cells, args, attrs): - """ - Returns the corresponding name, Dahlia function type, and op (if it is a binary op, otherwise None). - If the function type isn't supported, fails with an assertion. - """ - input_type = cells[0].primitive.type - function = name = op = None - if function_name in BuiltInBinaryOps: - op = BuiltInBinaryOps[function_name] - function, name = broadcast, function_name - elif function_name in RelayFunctionCalls: - function = RelayFunctionCalls[function_name] - name = function.__name__ - else: - assert False, f'{function_name} with type {input_type} is not supported.' - return DahliaDeclaration(component_name=self.relay_id(name), decl_name=self.id(name), - op=op, inputs=args, attributes=attrs, function=function) - - def visit_var(self, var): + def visit_var(self, var) -> FCell: name = self.relay_id(var.name_hint) - # Do not add duplicate primitives to main. - if self.main.contains_primitive(name): return cell + if name in self.main.cells: return cell data, type, data_type = get_memory_parameters(var.type_annotation) - dahlia_name = self.dahlia_name(name, type) - return FCell(dahlia_name=dahlia_name, + return FCell(dahlia_name=self.dahlia_name(name, type), primitive=FPrimitive(name=name, data=data, data_type=data_type, type=type)) def visit_let(self, let): values, output = self.visit(let.value), self.visit(let.var) if isinstance(values, list): - for value in values: - if not value.is_dahlia_declaration(): continue - value.dahlia_declaration.output = output - value.dahlia_declaration.invoke() + for value in flatten(values): + if value.is_relay_function(): value.relay_function.output = output return [self.visit(let.body), values] - def visit_constant(self, const): + def visit_constant(self, const) -> FCell: # Note: We're currently treating constants defined in a `let` statement in Relay IR as 1D Memory. - type, shape = const.data.dtype, const.data.shape - name, data = self.id("const"), [get_bitwidth(type), int(const.data.asnumpy())] - data_type = get_memory_parameters(type) - return FCell(primitive=FPrimitive(name=name, data=data, data_type=data_type, type=PrimitiveType.Constant)) + # type, shape = const.data.dtype, const.data.shape + pass - def visit_call(self, call): + def visit_call(self, call) -> List[FCell]: attributes = call.attrs cells, args = [], [] for arg in call.args: argument = self.visit(arg) cells.append(argument) args.append(argument) - cells.append(FCell(dahlia_declaration=self.get_dahlia_declaration(call.op.name, cells, args, call.attrs))) + # We are representing all function calls in Relay IR at the Dahlia level, which will then be lowered to FuTIL. + # Note, the Relay function's output is not defined until the `let` statement is visited. + function, name, op = GetRelayFunctionCall(call.op.name) + component_name = self.id(name) + relay_function_call = RelayFunctionCall(component_name=component_name, name=f'comp_{component_name}', + op=op, inputs=args, attributes=call.attrs, lowering_function=function) + cells.append(FCell(relay_function=relay_function_call)) return cells def visit_function(self, function): body = self.visit(function.body) - for cell in flatten(body): - self.main.add_cell(cell) - if not cell.is_dahlia_declaration(): continue - self.dahlia_components.append(cell.dahlia_declaration.program) + for cell in flatten(body): self.main.add_cell(cell) build_main_controls(self.main) - return pp_component(self.main) + return pp_lowered_relay_function(self.main) def relay_transforms(expr: Function) -> Function: """https://tvm.apache.org/docs/api/python/relay/transform.html""" - transform = tvm.transform.Sequential([ + transforms = tvm.transform.Sequential([ relay.transform.SimplifyExpr(), relay.transform.SimplifyInference(), - relay.transform.InferType() + relay.transform.InferType(), ]) mod = ir.IRModule.from_expr(expr) - mod['main'] = expr - mod = transform(mod) + mod = transforms(mod) return mod['main'] @@ -147,11 +111,9 @@ def lower_to_futil(program) -> str: program = relay_transforms(program) visitor = Relay2Futil() - PREAMBLE = """import "primitives/std.lib";""" + PREAMBLE = """import "primitives/std.lib";\n""" MAIN = visitor.visit(program) - DAHLIA_COMPONENTS = '\n'.join(visitor.dahlia_components) - NEWL = '\n\n' - return f'{PREAMBLE}{NEWL}{DAHLIA_COMPONENTS}{NEWL}{MAIN}' + return '\n'.join((PREAMBLE, MAIN)) if __name__ == '__main__': diff --git a/frontends/relay-futil/dahlia_functions.py b/frontends/relay-futil/dahlia_functions.py deleted file mode 100644 index 106c000205..0000000000 --- a/frontends/relay-futil/dahlia_functions.py +++ /dev/null @@ -1,337 +0,0 @@ -import subprocess -import os - -from tempfile import NamedTemporaryFile, TemporaryFile -from futil_ast import * -from pretty_print import * - -IMPORT_STATEMENT = """import "primitives/std.lib";\n""" -NO_ERR = "2>/dev/null" -CHARACTER_I = chr(ord('i')) -NEWL = '\n' - - -def lower_dahlia_program(prog, component_name): - """ - Takes in a string representation of a Dahlia program, lowers it to FuTIL with the given `component_name`, - and applies the `externalize` pass. This pass exposes the inputs and outputs of primitive types that are - declared external, e.g. `std_mem_d1_ext`, and places them in the inputs and outputs of the respective component. - - Example: - ------ Dahlia, component name: ProcessX ------ - decl X: ubit<32>[4]; - ... - - ------------- Lower to FuTIL ----------------- - component ProcessX() -> () { - X = prim std_mem_d1_ext(32, 4, 2); - ... - } - - ------------- Externalize Pass --------------- - component ProcessX - (go: 1, clk: 1, X0_read_data: 32, X0_done: 1) -> - (done: 1, X0_addr0: 2, X0_write_data: 32, X0_write_en: 1, X0_clk: 1) { - ... - } - """ - program_string = '\n'.join(prog.splitlines()) - with NamedTemporaryFile() as tf0, NamedTemporaryFile() as tf1, NamedTemporaryFile() as tf2: - tf0.write(bytes(program_string, 'UTF-8')) - tf0.seek(0), tf1.seek(0), tf2.seek(0) - fuse_binary = os.environ['DAHLIA_EXEC'] if 'DAHLIA_EXEC' in os.environ else 'fuse' - command = f""" - {fuse_binary} {tf0.name} --lower -b=futil -n={component_name} > {tf1.name} {NO_ERR} \ - && cargo run -- {tf1.name} -l ../../ -p externalize > {tf2.name} {NO_ERR}""" - subprocess.Popen(command, stdout=subprocess.PIPE, shell=True).communicate() - component = tf2.read().decode()[len(IMPORT_STATEMENT):] # Skip over importing the primitives library. - return component - - -def broadcast(declaration): - """ - https://numpy.org/doc/stable/user/basics.broadcasting.html - Implements array broadcasting: - Two dimensions are compatible when either (1) they're equal, or (2) one of them is 1. - It is not required that both operands have the same number of dimensions either. - - When lowering from Relay IR, we are guaranteed the arrays are compatible for broadcasting. - - Variable names for indexing through the array begin with `i`, and continue alphabetically. - - Example: - first operand: 64 x 1 x 32 - second operand: 16 x 1 - result: 64 x 16 x 32 - -> - for (i = 0...64) { - for (j = 0..16) { - for (k = 0..32) { - result[i][j][k] := op1[i][0][k] op op2[j][0]; - ... - """ - op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive - - op1_dims, op2_dims, res_dims = op1.type, op2.type, res.type - op1_sizes, op2_sizes, res_sizes = [], [], [] - # Get memory sizes in reversed order. - for i in reversed(range(0, op1_dims)): op1_sizes.append(op1.data[i + 1]) - for i in reversed(range(0, op2_dims)): op2_sizes.append(op2.data[i + 1]) - for i in reversed(range(0, res_dims)): res_sizes.append(res.data[i + 1]) - - # Gets the last variable name since we will compare sizes in the reverse direction. - variable_name = chr(ord(CHARACTER_I) + res_dims - 1) - # Determine the value at the indices in reverse order. - # For each dimension, this will either be `[x]` for index_variable `x`, or `[0]` - # depending on the relationship between the dimensions sizes. - op1_indices, op2_indices, res_indices = [], [], [] - for i in range(0, len(res_sizes)): - current_dimension, index_zero = f'[{variable_name}]', '[0]' - res_indices.append(current_dimension) - if op1_dims > op2_dims and len(op2_sizes) <= i: - op1_indices.append(current_dimension) - continue - if op2_dims > op1_dims and len(op1_sizes) <= i: - op2_indices.append(current_dimension) - continue - if op1_sizes[i] == op2_sizes[i]: - op1_indices.append(current_dimension) - op2_indices.append(current_dimension) - elif op1_sizes[i] > op2_sizes[i]: - op1_indices.append(current_dimension) - op2_indices.append(index_zero) - else: # op2_sizes[i] < op1_sizes[i] - op1_indices.append(index_zero) - op2_indices.append(current_dimension) - variable_name = next_character(variable_name, -1) - - # Resulting index in the nested for loop, e.g. for `op1[i][j][0][k]`, this is `[i][j][0][k]`. - op1_index = ''.join(reversed(op1_indices)) - op2_index = ''.join(reversed(op2_indices)) - res_index = ''.join(reversed(res_indices)) - loop_body = f'{res.name}{res_index} := {op1.name}{op1_index} {declaration.op} {op2.name}{op2_index};' - - program_body = pp_dahlia_loop(res, loop_body) - declarations = pp_dahlia_memory_declarations([res, op1, op2]) - program = f"""{declarations}{NEWL}{program_body}""" - return lower_dahlia_program(program, declaration.component_name) - - -def batch_flatten(declaration): - """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_flatten""" - data, res = declaration.inputs[0].primitive, declaration.output.primitive - bitwidth, num_dimensions = data.data[0], data.type - res_index_size1 = res.data[4] - - variable_name = CHARACTER_I - data_indices, res_indices = "", f'[{variable_name}]' - for i in range(0, num_dimensions): - # Determine loop body indices based on `axis` provided. - size, index_size = data.data[i + 1], data.data[i + num_dimensions + 1] - index = f'[{variable_name}]' - data_indices += index - variable_name = next_character(variable_name) - res_indices += f'[{variable_name}]' - - declarations = pp_dahlia_memory_declarations([data, res]) - let_flattened = f'let {variable_name}: ubit<{res_index_size1}> = 0;' - body = f"{res.name}{res_indices} := {data.name}{data_indices}; {variable_name} := {variable_name} + 1;" - program_body = pp_dahlia_loop(data, body) - program = f"""{declarations}{NEWL}{let_flattened}{NEWL}{program_body}""" - return lower_dahlia_program(program, declaration.component_name) - - -def bias_add(declaration): - """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.bias_add""" - data, bias, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive - bitwidth, num_dimensions = data.data[0], data.type - - axis_attribute = declaration.attributes.get_int("axis") - axis = num_dimensions - 1 if axis_attribute == -1 else axis_attribute - - variable_name = CHARACTER_I - data_indices = "" - for i in range(0, num_dimensions): - # Determine loop body indices based on `axis` provided. - size, index_size = data.data[i + 1], data.data[i + num_dimensions + 1] - index = f'[{variable_name}]' - if axis == i: bias_index = index - data_indices += index - variable_name = next_character(variable_name) - - declarations = pp_dahlia_memory_declarations([data, bias, res]) - body = (f"{res.name}{data_indices} := {data.name}{data_indices} + {bias.name}{bias_index};") - program_body = pp_dahlia_loop(data, body) - return lower_dahlia_program(f"""{declarations}{NEWL}{program_body}""", declaration.component_name) - - -# TODO(cgyurgyik): -# 1. This won't work for fixed point currently, since Dahlia -# will not take fixed point operands for the `>` operator. -# 2. Without signed bit array support, this is also meaningless. -def relu(declaration): - """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.relu""" - data, res = declaration.inputs[0].primitive, declaration.output.primitive - bitwidth, num_dimensions = data.data[0], data.type - - declarations = pp_dahlia_memory_declarations([data, res]) - zero = '0.0' if data.data_type == 'ufix' else '0' - let_zero = f'let zero: {data.data_type}<{bitwidth}> = {zero};' - - indices = "" - variable_name = CHARACTER_I - for i in range(0, num_dimensions): - # Determine loop body indices. - indices += f'[{variable_name}]' - variable_name = next_character(variable_name) - - body = f"""if ({data.name}{indices} > zero) {{ {res.name}{indices} := {data.name}{indices}; }} - else {{ {res.name}{indices} := zero; }}""" - program_body = pp_dahlia_loop(data, body) - return lower_dahlia_program(f"""{declarations}{NEWL}{let_zero}{NEWL}{program_body}""", declaration.component_name) - - -# TODO(cgyurgyik): Similar to ReLU, this requires signed operands. -def negative(declaration): - """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.negative""" - op, res = declaration.inputs[0].primitive, declaration.output.primitive - bitwidth, num_dimensions = op.data[0], op.type - - indices = "" - variable_name = CHARACTER_I - for i in range(0, num_dimensions): - # Determine loop body indices. - indices += f'[{variable_name}]' - variable_name = next_character(variable_name) - - declarations = pp_dahlia_memory_declarations([op, res]) - program_body = pp_dahlia_loop(op, f"""{res.name}{indices} := -{op.name}{indices};""") - return lower_dahlia_program(f"""{declarations}{NEWL}{program_body}""", declaration.component_name) - - -# TODO(cgyurgyik): Similar to ReLU, this requires signed operands. -def sqrt(declaration): - """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.negative""" - op, res = declaration.inputs[0].primitive, declaration.output.primitive - bitwidth, num_dimensions, data_type = op.data[0], op.type, op.data_type - include_sqrt = f"""import "fxp_sqrt.h" {{ def sqrt(value: {data_type}<{bitwidth}>): {data_type}<{bitwidth}>; }}""" - - indices = "" - variable_name = CHARACTER_I - for i in range(0, num_dimensions): - # Determine loop body indices. - indices += f'[{variable_name}]' - variable_name = next_character(variable_name) - - declarations = pp_dahlia_memory_declarations([op, res]) - program_body = pp_dahlia_loop(op, f"""{res.name}{indices} := sqrt({op.name}{indices});""") - return lower_dahlia_program(f"""{include_sqrt}{NEWL}{declarations}{NEWL}{program_body}""", - declaration.component_name) - - -def expand_dims(declaration): - """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.expand_dims""" - axis, num_newaxis = declaration.attributes.get_int("axis"), declaration.attributes.get_int("num_newaxis") - data, res = declaration.inputs[0].primitive, declaration.output.primitive - bitwidth, num_dimensions = data.data[0], data.type - - declarations = pp_dahlia_memory_declarations([data, res]) - - res_indices, data_indices = "", "" - variable_name = CHARACTER_I - for i in range(0, num_dimensions): - # Determine loop body indices. - index = f'[{variable_name}]' - res_indices += index - data_indices += index - if axis == i + 1: - for _ in range(0, num_newaxis): res_indices += '[0]' - variable_name = next_character(variable_name) - - program_body = pp_dahlia_loop(data, f'{res.name}{res_indices} := {data.name}{data_indices}') - program = f"""{declarations}{NEWL}{program_body}""" - return lower_dahlia_program(program, declaration.component_name) - - -def batch_matmul(declaration): - """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_matmul""" - op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive - bitwidth, M1_size0, M1_size1, M1_size2 = op1.data[0], op1.data[1], op1.data[2], op1.data[3] - M1_index_size0, M1_index_size1, M1_index_size2 = op1.data[4], op1.data[5], op1.data[6] - M2_size0, M2_size1, M2_size2 = op2.data[1], op2.data[2], op2.data[3] - M2_index_size0, M2_index_size1, M2_index_size2 = op2.data[4], op2.data[5], op2.data[6] - # 1. Get transpose of second operand. - # 2. Create temporary value `t`. Then, t = op1 * transpose(op2). - # 3. Copy temporary value to return value.* - # * This third step may not be necessary, but trying to conduct the matrix multiply - # directly with the return value declared resulted in incorrect outputs. - declarations = pp_dahlia_memory_declarations([res, op1, op2]) - program = f"""{declarations} - let transpose_{op2.name}: {op2.data_type}<{bitwidth}>[{M2_size0}][{M2_size2}][{M2_size1}]; - let temporary_{res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M1_size1}][{M2_size1}]; - for (let batch: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ - for (let i: ubit<{M2_index_size1}> = 0..{M2_size1}) {{ - for (let j: ubit<{M2_index_size2}> = 0..{M2_size2}) {{ - transpose_{op2.name}[batch][j][i] := {op2.name}[batch][i][j]; - }} - }} - }} - - for (let batch: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ - for (let i: ubit<{M1_index_size1}> = 0..{M1_size1}) {{ - for (let j: ubit<{M2_index_size1}> = 0..{M2_size1}) {{ - for (let k: ubit<{M2_index_size2}> = 0..{M2_size2}) {{ - let product = {op1.name}[batch][i][k] * transpose_{op2.name}[batch][k][j]; - }} combine {{ - temporary_{res.name}[batch][i][j] += product; - }} - }} - }} - }} - - for (let batch: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ - for (let i: ubit<{M1_index_size1}> = 0..{M1_size1}) {{ - for (let j: ubit<{M2_index_size1}> = 0..{M2_size1}) {{ - {res.name}[batch][i][j] := temporary_{res.name}[batch][i][j]; - }} - }} - }} - """ - return lower_dahlia_program(program, declaration.component_name) - - -# TODO(cgyurgyik): Similar to batch_matmul, this requires a temporary memory to store the output -# of the matrix multiply. Otherwise, the values aren't computed properly. Look deeper into this. -def dense(declaration): - """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_matmul""" - # TODO(cgyurgyik): Add support for `units`. - units = declaration.attributes.get_int("units") - op1, op2, res = declaration.inputs[0].primitive, declaration.inputs[1].primitive, declaration.output.primitive - bitwidth, M1_size0, M1_size1 = op1.data[0], op1.data[1], op1.data[2] - M1_index_size0, M1_index_size1 = op1.data[3], op1.data[4] - M2_size0, M2_size1, M2_index_size0, M2_index_size1 = op2.data[1], op2.data[2], op2.data[3], op2.data[4] - program = f""" - {pp_dahlia_memory_declarations([res, op1, op2])} - let transpose_{op2.name}: {op2.data_type}<{bitwidth}>[{M2_size1}][{M2_size0}]; - let temporary_{res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M2_size0}]; - for (let i: ubit<{M2_index_size0}> = 0..{M2_size0}) {{ - for (let j: ubit<{M2_index_size1}> = 0..{M2_size1}) {{ - transpose_{op2.name}[j][i] := {op2.name}[i][j]; - }} - }} - - for (let i: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ - for (let j: ubit<{M2_index_size0}> = 0..{M2_size0}) {{ - for (let k: ubit<{M1_index_size1}> = 0..{M1_size1}) {{ - let product = {op1.name}[i][k] * transpose_{op2.name}[k][j]; - }} combine {{ - temporary_{res.name}[i][j] += product; - }} - }} - }} - - for (let i: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ - for (let j: ubit<{M2_index_size0}> = 0..{M2_size0}) {{ - {res.name}[i][j] := temporary_{res.name}[i][j]; - }} - }} - """ - return lower_dahlia_program(program, declaration.component_name) diff --git a/frontends/relay-futil/dahlia_lowering.py b/frontends/relay-futil/dahlia_lowering.py new file mode 100644 index 0000000000..6b83a28784 --- /dev/null +++ b/frontends/relay-futil/dahlia_lowering.py @@ -0,0 +1,515 @@ +import subprocess +import os + +from tempfile import NamedTemporaryFile, TemporaryFile +from futil_ast import * + +IMPORT_STATEMENT = """import "primitives/std.lib";\n""" +NO_ERR = "2>/dev/null" +NEWL = '\n' +CHARACTER_I = chr(ord('i')) # Starting index variable name for Dahlia array iteration. + + +def next_character(ch, dir=1): + """ + Returns the next character after 'ch'. + If `dir` is positive, then will return 'ch' + 1. Otherwise, it will return 'ch' - 1. + """ + return chr(ord(ch) + 1) if dir > 0 else chr(ord(ch) - 1) + + +def PPDahliaMemoryDeclarations(relay_function): + """ + Pretty print for Dahlia memory declarations, e.g. + `decl X: ubit<32> [1][10];` + """ + cell_list = relay_function.inputs + cell_list.append(relay_function.output) + + declarations = [] + for cell in cell_list: + declaration = cell.primitive + declaration_str = f'decl {declaration.name}: {declaration.data_type}<{declaration.data[0]}>' + for i in range(0, declaration.type): declaration_str += f'[{declaration.data[i + 1]}]' + declarations.append(declaration_str + ";") + return '\n'.join(declarations) + + +def PPDahliaLoop(relay_function, body, num_dimensions, data=None): + """ + Returns an iteration over data with `body` as the work done within the nested loop(s). + Many tensor functions share the same control flow: (1) Iterate `num_dimensions` times, and (2) do some work in body. + For example, if `data` is a 2D primitive of size (M, N) and body == `X;`, then this will return: + + ``` + for (let i: ubit = 0..M) { + for (let j: ubit = 0..N) { + X; + } + } + ``` + + Notes: + If `data` is provided, it will be used to determine the `num_dimensions` as well as the corresponding bitwidths + and memory sizes. This occurs only in special cases; otherwise, the `output` of the `relay_function` will + determine these. + """ + variable_name = CHARACTER_I + program = [] + SPACING = '' + output = relay_function.output.primitive if data == None else data + for i in range(0, num_dimensions): + size, index_size = output.data[i + 1], output.data[i + num_dimensions + 1] + program.append(f'{SPACING}for (let {variable_name}: ubit<{index_size}> = 0..{size}) {{') + variable_name = next_character(variable_name) + SPACING += ' ' + program.append(f'{SPACING}{body}') + + for i in range(0, num_dimensions): + SPACING = SPACING[:-2] + program.append(SPACING + '}') + return '\n'.join(program) + + +def LowerDahliaProgramToFuTIL(relay_function, dahlia_body, dahlia_imports=None): + """ + Takes in a string representation of a Dahlia program, lowers it to FuTIL with the given `component_name`, + and applies the `externalize` pass. This pass exposes the inputs and outputs of primitive types that are + declared external, e.g. `std_mem_d1_ext`, and places them in the inputs and outputs of the respective component. + + Example: + ------ Dahlia, component name: ProcessX ------ + import "foo.h" { ... } + decl X: ubit<32>[4]; + ... + + ------------- Lower to FuTIL ----------------- + component ProcessX() -> () { + X = prim std_mem_d1_ext(32, 4, 2); + ... + } + + ------------- Externalize Pass --------------- + component ProcessX + (go: 1, clk: 1, X0_read_data: 32, X0_done: 1) -> + (done: 1, X0_addr0: 2, X0_write_data: 32, X0_write_en: 1, X0_clk: 1) { + ... + } + """ + if dahlia_imports == None: dahlia_imports = '' + program_string = '\n'.join((dahlia_imports, PPDahliaMemoryDeclarations(relay_function), dahlia_body)) + + with NamedTemporaryFile() as tf0, NamedTemporaryFile() as tf1, NamedTemporaryFile() as tf2: + tf0.write(bytes(program_string, 'UTF-8')) + tf0.seek(0), tf1.seek(0), tf2.seek(0) + fuse_binary = os.environ['DAHLIA_EXEC'] if 'DAHLIA_EXEC' in os.environ else 'fuse' + command = f""" + {fuse_binary} {tf0.name} --lower -b=futil -n={relay_function.component_name} > {tf1.name} {NO_ERR} \ + && fud e --from futil {tf1.name} --to futil-externalize > {tf2.name} {NO_ERR}""" + subprocess.Popen(command, stdout=subprocess.PIPE, shell=True).communicate() + component = tf2.read().decode()[len(IMPORT_STATEMENT):] # Skip over importing the primitives library. + return component + + +#################################################################################################### +################## Dahlia Implementations for Relay Function Calls ################################# +#################################################################################################### + +def broadcast(function: RelayFunctionCall): + """ + https://numpy.org/doc/stable/user/basics.broadcasting.html + Implements array broadcasting: + Two dimensions are compatible when either (1) they're equal, or (2) one of them is `1`. + It is not required that both operands have the same number of dimensions either. + - When lowering from Relay IR, we are guaranteed the arrays are compatible for broadcasting. + - Variable names for indexing through the array begin with `i`, and continue alphabetically. + + Example: + first operand: 64 x 1 x 32 + second operand: 16 x 1 + result: 64 x 16 x 32 + -> + for (i = 0...64) { + for (j = 0..16) { + for (k = 0..32) { + result[i][j][k] := op1[i][0][k] op op2[j][0]; + ... + """ + op1, op2, res = function.inputs[0].primitive, function.inputs[1].primitive, function.output.primitive + op1_dims, op2_dims, res_dims = op1.type, op2.type, res.type + op1_sizes, op2_sizes, res_sizes = [], [], [] + # Get memory sizes in reversed order. + for i in reversed(range(0, op1_dims)): op1_sizes.append(op1.data[i + 1]) + for i in reversed(range(0, op2_dims)): op2_sizes.append(op2.data[i + 1]) + for i in reversed(range(0, res_dims)): res_sizes.append(res.data[i + 1]) + + # Gets the last variable name since we will compare sizes in the reverse direction. + variable_name = chr(ord(CHARACTER_I) + res_dims - 1) + # Determine the value at the indices in reverse order. + # For each dimension, this will either be `[x]` for index_variable `x`, or `[0]` + # depending on the relationship between the dimensions sizes. + op1_indices, op2_indices, res_indices = [], [], [] + for i in range(0, len(res_sizes)): + current_dimension, index_zero = f'[{variable_name}]', '[0]' + res_indices.append(current_dimension) + if op1_dims > op2_dims and len(op2_sizes) <= i: + op1_indices.append(current_dimension) + elif op2_dims > op1_dims and len(op1_sizes) <= i: + op2_indices.append(current_dimension) + elif op1_sizes[i] == op2_sizes[i]: + op1_indices.append(current_dimension) + op2_indices.append(current_dimension) + elif op1_sizes[i] > op2_sizes[i]: + op1_indices.append(current_dimension) + op2_indices.append(index_zero) + else: # op2_sizes[i] < op1_sizes[i] + op1_indices.append(index_zero) + op2_indices.append(current_dimension) + variable_name = next_character(variable_name, -1) + + # Resulting index in the nested for loop, e.g. for `op1[i][j][0][k]`, this is `[i][j][0][k]`. + op1_index = ''.join(reversed(op1_indices)) + op2_index = ''.join(reversed(op2_indices)) + res_index = ''.join(reversed(res_indices)) + loop_body = f'{res.name}{res_index} := {op1.name}{op1_index} {function.op} {op2.name}{op2_index};' + + return LowerDahliaProgramToFuTIL(function, PPDahliaLoop(function, loop_body, num_dimensions=res_dims)) + + +def batch_flatten(function): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_flatten""" + data, res = function.inputs[0].primitive, function.output.primitive + bitwidth, num_dimensions = res.data[0], data.type + res_index_size1 = res.data[4] + + variable_name = CHARACTER_I + data_indices, res_indices = "", f'[{variable_name}]' + for i in range(0, num_dimensions): + # Determine loop body indices based on `axis` provided. + size, index_size = data.data[i + 1], data.data[i + num_dimensions + 1] + index = f'[{variable_name}]' + data_indices += index + variable_name = next_character(variable_name) + res_indices += f'[{variable_name}]' + + let_flattened = f'let {variable_name}: ubit<{res_index_size1}> = 0;' + body = f"{res.name}{res_indices} := {data.name}{data_indices}; {variable_name} := {variable_name} + 1;" + program_body = '\n'.join((let_flattened, PPDahliaLoop(function, body, num_dimensions, data))) + return LowerDahliaProgramToFuTIL(function, program_body) + + +def bias_add(function): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.bias_add""" + data, bias, res = function.inputs[0].primitive, function.inputs[1].primitive, function.output.primitive + bitwidth, num_dimensions = data.data[0], data.type + + axis_attribute = function.attributes.get_int("axis") + axis = num_dimensions - 1 if axis_attribute == -1 else axis_attribute + + variable_name = CHARACTER_I + data_indices = "" + for i in range(0, num_dimensions): + # Determine loop body indices based on `axis` provided. + size, index_size = data.data[i + 1], data.data[i + num_dimensions + 1] + index = f'[{variable_name}]' + if axis == i: bias_index = index + data_indices += index + variable_name = next_character(variable_name) + + body = f"{res.name}{data_indices} := {data.name}{data_indices} + {bias.name}{bias_index};" + return LowerDahliaProgramToFuTIL(function, PPDahliaLoop(function, body, num_dimensions)) + + +# TODO(cgyurgyik): +# 1. This won't work for fixed point currently, since Dahlia +# will not take fixed point operands for the `>` operator. +# 2. Without signed bit array support, this is also meaningless. +def relu(function): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.relu""" + data, res = function.inputs[0].primitive, function.output.primitive + bitwidth, num_dimensions, data_type = data.data[0], data.type, data.data_type + + zero = '0.0' if data_type == 'ufix' or data_type == 'fix' else '0' + let_zero = f'let zero: {data_type}<{bitwidth}> = {zero};' + + indices = "" + variable_name = CHARACTER_I + for i in range(0, num_dimensions): + # Determine loop body indices. + indices += f'[{variable_name}]' + variable_name = next_character(variable_name) + + body = f"""if ({data.name}{indices} > zero) {{ {res.name}{indices} := {data.name}{indices}; }} + else {{ {res.name}{indices} := zero; }}""" + program_body = '\n'.join((let_zero, PPDahliaLoop(function, body, num_dimensions))) + return LowerDahliaProgramToFuTIL(function, program_body) + + +# TODO(cgyurgyik): Similar to ReLU, this requires signed operands. +def negative(function): + """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.negative""" + op, res = function.inputs[0].primitive, function.output.primitive + bitwidth, num_dimensions, data_type = op.data[0], op.type, op.data_type + + indices = "" + variable_name = CHARACTER_I + for i in range(0, num_dimensions): + # Determine loop body indices. + indices += f'[{variable_name}]' + variable_name = next_character(variable_name) + + zero = '0.0' if data_type == 'ufix' or data_type == 'fix' else '0' + program_body = PPDahliaLoop(function, f"""{res.name}{indices} := {zero} - {op.name}{indices};""", num_dimensions) + return LowerDahliaProgramToFuTIL(function, program_body) + + +def sqrt(function): + """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.sqrt""" + op, res = function.inputs[0].primitive, function.output.primitive + bitwidth, num_dimensions, data_type = op.data[0], op.type, op.data_type + include_sqrt = f"""import "fxp_sqrt.h" {{ def sqrt(value: {data_type}<{bitwidth}>): {data_type}<{bitwidth}>; }}""" + + indices = "" + variable_name = CHARACTER_I + for i in range(0, num_dimensions): + # Determine loop body indices. + indices += f'[{variable_name}]' + variable_name = next_character(variable_name) + + program_body = PPDahliaLoop(function, f"""{res.name}{indices} := sqrt({op.name}{indices});""", num_dimensions) + return LowerDahliaProgramToFuTIL(function, program_body, include_sqrt) + + +def expand_dims(function): + """https://tvm.apache.org/docs/api/python/relay/index.html#tvm.relay.expand_dims""" + axis, num_newaxis = function.attributes.get_int("axis"), function.attributes.get_int("num_newaxis") + data, res = function.inputs[0].primitive, function.output.primitive + bitwidth, num_dimensions = data.data[0], data.type + + res_indices, data_indices = "", "" + variable_name = CHARACTER_I + for i in range(0, num_dimensions): + # Determine loop body indices. + index = f'[{variable_name}]' + res_indices += index + data_indices += index + if axis == i + 1: + for _ in range(0, num_newaxis): res_indices += '[0]' + variable_name = next_character(variable_name) + + program_body = PPDahliaLoop(function, f'{res.name}{res_indices} := {data.name}{data_indices}', num_dimensions, data) + return LowerDahliaProgramToFuTIL(function, program_body) + + +def batch_matmul(function): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.batch_matmul""" + op1, op2, res = function.inputs[0].primitive, function.inputs[1].primitive, function.output.primitive + bitwidth, M1_size0, M1_size1, M1_size2 = op1.data[0], op1.data[1], op1.data[2], op1.data[3] + M1_index_size0, M1_index_size1, M1_index_size2 = op1.data[4], op1.data[5], op1.data[6] + M2_size0, M2_size1, M2_size2 = op2.data[1], op2.data[2], op2.data[3] + M2_index_size0, M2_index_size1, M2_index_size2 = op2.data[4], op2.data[5], op2.data[6] + # 1. Get transpose of second operand. + # 2. Create temporary value `t`. Then, t = op1 * transpose(op2). + # 3. Copy temporary value to return value.* + # * This third step may not be necessary, but trying to conduct the matrix multiply + # directly with the return value declared resulted in incorrect outputs. + program_body = f""" + let transpose_{op2.name}: {op2.data_type}<{bitwidth}>[{M2_size0}][{M2_size2}][{M2_size1}]; + let temporary_{res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M1_size1}][{M2_size1}]; + for (let batch: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ + for (let i: ubit<{M2_index_size1}> = 0..{M2_size1}) {{ + for (let j: ubit<{M2_index_size2}> = 0..{M2_size2}) {{ + transpose_{op2.name}[batch][j][i] := {op2.name}[batch][i][j]; + }} + }} + }} + + for (let batch: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ + for (let i: ubit<{M1_index_size1}> = 0..{M1_size1}) {{ + for (let j: ubit<{M2_index_size1}> = 0..{M2_size1}) {{ + for (let k: ubit<{M2_index_size2}> = 0..{M2_size2}) {{ + let product = {op1.name}[batch][i][k] * transpose_{op2.name}[batch][k][j]; + }} combine {{ + temporary_{res.name}[batch][i][j] += product; + }} + }} + }} + }} + + for (let batch: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ + for (let i: ubit<{M1_index_size1}> = 0..{M1_size1}) {{ + for (let j: ubit<{M2_index_size1}> = 0..{M2_size1}) {{ + {res.name}[batch][i][j] := temporary_{res.name}[batch][i][j]; + }} + }} + }} + """ + return LowerDahliaProgramToFuTIL(function, program_body) + + +# TODO(cgyurgyik): Similar to batch_matmul, this requires a temporary memory to store the output +# of the matrix multiply. Otherwise, the values aren't computed properly. Look deeper into this. +def dense(function): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.dense""" + op1, op2, res = function.inputs[0].primitive, function.inputs[1].primitive, function.output.primitive + bitwidth, M1_size0, M1_size1 = op1.data[0], op1.data[1], op1.data[2] + M1_index_size0, M1_index_size1 = op1.data[3], op1.data[4] + M2_size0, M2_size1, M2_index_size0, M2_index_size1 = op2.data[1], op2.data[2], op2.data[3], op2.data[4] + program = f""" + let transpose_{op2.name}: {op2.data_type}<{bitwidth}>[{M2_size1}][{M2_size0}]; + let temporary_{res.name}: {res.data_type}<{bitwidth}>[{M1_size0}][{M2_size0}]; + for (let i: ubit<{M2_index_size0}> = 0..{M2_size0}) {{ + for (let j: ubit<{M2_index_size1}> = 0..{M2_size1}) {{ + transpose_{op2.name}[j][i] := {op2.name}[i][j]; + }} + }} + + for (let i: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ + for (let j: ubit<{M2_index_size0}> = 0..{M2_size0}) {{ + for (let k: ubit<{M1_index_size1}> = 0..{M1_size1}) {{ + let product = {op1.name}[i][k] * transpose_{op2.name}[k][j]; + }} combine {{ + temporary_{res.name}[i][j] += product; + }} + }} + }} + + for (let i: ubit<{M1_index_size0}> = 0..{M1_size0}) {{ + for (let j: ubit<{M2_index_size0}> = 0..{M2_size0}) {{ + {res.name}[i][j] := temporary_{res.name}[i][j]; + }} + }} + """ + return LowerDahliaProgramToFuTIL(function, program) + + +# TODO(cgyurgyik): Currently, only supports a small subset (namely those used in our VGG net and MLP net examples). +def softmax(function): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.softmax""" + assert False, "Unimplemented." + op, res = function.inputs[0].primitive, function.output.primitive + axis = function.attributes.get_int("axis") + data_type = op.data_type + assert op.type == PrimitiveType.Memory2D, f'nn.softmax with pritmive type Memory{op.type}D is not supported.' + assert axis == -1 or axis == 1, f'nn.softmax with axis = {axis} is not supported.' + bitwidth, size0, size1, index_size0, index_size1 = op.data[0], op.data[1], op.data[2], op.data[3], op.data[4] + + import_exp = f"""import "std_exp.h" {{ def exp(x: {data_type}<{bitwidth}>): {data_type}<{bitwidth}>; }}""" + zero = '0.0' if data_type == 'ufix' or data_type == 'fix' else '0' + program_body = f""" + for (let i: ubit<{index_size0}> = 0..{size0}) {{ + let {op.name}_expsum: {data_type}<{bitwidth}> = {zero}; + for (let j: ubit<{index_size1}> = 0..{size1}) {{ + {op.name}_expsum += exp({op.name}[i][j]); + }} + for (let k: ubit<{index_size1}> = 0..{size1}) {{ + {res.name}[i][k] := exp({op.name}[i][k]); + --- + {res.name}[i][k] := {res.name}[i][k] / {op.name}_expsum; + }} + }} + """ + return LowerDahliaProgramToFuTIL(function, program_body, import_exp) + + +def max_pool2d(function): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.max_pool2d""" + data, res = function.inputs[0].primitive, function.output.primitive + + strides = function.attributes.get_int_tuple("strides") + pool_size = function.attributes.get_int_tuple("pool_size") + layout = function.attributes.get_str("layout") + ceil_mode = function.attributes.get_int("ceil_mode") + assert layout == 'NCHW', f"Layout \'{layout}\' is not currently supported for nn.max_pool2d; please use `NCHW`" + assert ceil_mode == False, "`ceil_mode` is not currently supported for nn.max_pool2d" + bitwidth, data_type = data.data[0], data.data_type + size0, size1, size2, size3 = res.data[1], res.data[2], res.data[3], res.data[4] + + program_body = f""" + for (let b: ubit<32> = 0..{size0}) {{ + for (let c: ubit<32> = 0..{size1}) {{ + for (let y: ubit<32> = 0..{size2}) {{ + for (let x: ubit<32> = 0..{size3}) {{ + let stride_y: ubit<32> = y * {strides[0]}/*strides[0]*/; + let stride_x: ubit<32> = x * {strides[1]}/*strides[1]*/; + + let max: {data_type}<{bitwidth}> = {data.name}[b][c][stride_y][stride_x]; + for (let m: ubit<32> = 0..{pool_size[0]}/*pool_size[0]*/) {{ + for (let n: ubit<32> = 0..{pool_size[1]}/*pool_size[1]*/) {{ + let pool_y: ubit<32> = stride_y + m; + let pool_x: ubit<32> = stride_x + n; + let current: {data_type}<{bitwidth}> = {data.name}[b][c][pool_y][pool_x]; + if (current > max) {{ max := current; }} + }} + }} + {res.name}[b][c][y][x] := max; + }} + }} + }} + }} + """ + return LowerDahliaProgramToFuTIL(function, program_body) + + +# Only supports a small subset of the `conv2d` function. For example, +# dilation and grouped convolution are not supported. +def conv2d(function): + """https://tvm.apache.org/docs/api/python/relay/nn.html#tvm.relay.nn.conv2d""" + data, weight, res = function.inputs[0].primitive, function.inputs[1].primitive, function.output.primitive + + strides = function.attributes.get_int_tuple("strides") + kernel_size = function.attributes.get_int_tuple("kernel_size") + channels = function.attributes.get_int("channels") + bitwidth, data_type = data.data[0], data.data_type + size0, size1, size2, size3 = res.data[1], res.data[2], res.data[3], res.data[4] + + zero = '0.0' if data_type == 'ufix' or data_type == 'fix' else '0' + program_body = f""" + for (let b: ubit<32> = 0..{size0}) {{ + for (let c: ubit<32> = 0..{size1}) {{ + for (let y: ubit<32> = 0..{size2}) {{ + for (let x: ubit<32> = 0..{size3}) {{ + let sum: {data_type}<{bitwidth}> = {zero}; + + for (let k: ubit<32> = 0..{channels}) {{ + for (let dy: ubit<32> = 0..{kernel_size[1]}/*kernel_size[1]*/) {{ + for (let dx: ubit<32> = 0..{kernel_size[0]}/*kernel_size[0]*/) {{ + let kernel_y: ubit<32> = (/*strides[0]*/{strides[0]} * y) + dy; + let kernel_x: ubit<32> = (/*strides[1]*/{strides[1]} * x) + dx; + }} combine {{ sum += {data.name}[b][k][kernel_y][kernel_x] * {weight.name}[c][k][dy][dx]; }} + }} + }} + {res.name}[b][c][y][x] := sum; + }} + }} + }} + }} + """ + return LowerDahliaProgramToFuTIL(function, program_body) + + +# Mapping from Relay function names to their respective Dahlia lowering. +RelayFunctionCalls = {'nn.dense': dense, 'nn.batch_flatten': batch_flatten, 'nn.batch_matmul': batch_matmul, + 'nn.bias_add': bias_add, 'nn.relu': relu, 'nn.softmax': softmax, 'nn.max_pool2d': max_pool2d, + 'nn.conv2d': conv2d, 'negative': negative, 'expand_dims': expand_dims, 'sqrt': sqrt} + +# Mapping from Relay binary calls to the respective Dahlia operator. +BuiltInBinaryOps = {'add': '+', 'divide': '/', 'multiply': '*', 'subtract': '-'} + + +def GetRelayFunctionCall(function_name) -> RelayFunctionCall: + """ + Returns the corresponding name, function, and `op` type (if it is a binary op, otherwise None) + of the Relay function call. If the function call isn't supported, fails with an assertion. + """ + function = name = op = None + assert function_name in BuiltInBinaryOps or function_name in RelayFunctionCalls, \ + f'{function_name} is not supported for lowering from Relay IR to FuTIL.' + if function_name in BuiltInBinaryOps: + op = BuiltInBinaryOps[function_name] + function = broadcast + name = function_name + else: + function = RelayFunctionCalls[function_name] + name = function.__name__ + return function, name, op diff --git a/frontends/relay-futil/example.py b/frontends/relay-futil/example.py index 0a986e9453..078e90f248 100644 --- a/frontends/relay-futil/example.py +++ b/frontends/relay-futil/example.py @@ -16,6 +16,7 @@ def tensor_subtract(): return relay.Function([x, y], relay.subtract(x, y)) +# Trying to read in a function that uses `expand_dims` with relay.fromtext() leads to some peculiar errors. def expand_dims(): x = relay.var('x', shape=[512], dtype='int32') return relay.Function([x], relay.expand_dims(x, axis=1, num_newaxis=2)) @@ -49,6 +50,22 @@ def dense(): return relay.Function([x, y], relay.nn.dense(x, y, units=10)) +def softmax(): + x = relay.var('x', shape=[1, 10], dtype='float32') + return relay.Function([x], relay.nn.softmax(x)) + + +def max_pool2d(): + data = relay.var('data', shape=[2, 2, 4, 4], dtype='int32') + return relay.Function([data], relay.nn.max_pool2d(data, padding=[0, 0, 0, 0], strides=[2, 2], pool_size=[2, 2])) + + +def conv2d(): + d = relay.var('data', shape=[5, 512, 14, 14], dtype='int32') + w = relay.var('weight', shape=[512, 512, 3, 3], dtype='int32') + return relay.Function([d, w], relay.nn.conv2d(d, w, padding=[1, 1, 1, 1], channels=512, kernel_size=[3, 3])) + + def mlp_net(): """The MLP test from Relay.""" from tvm.relay.testing import mlp @@ -58,11 +75,12 @@ def mlp_net(): def vgg_net(): """The VGG test from Relay.""" from tvm.relay.testing import vgg - return vgg.get_net(batch_size=1, image_shape=(3, 224, 224), num_classes=10, dtype='int32', num_layers=11, + return vgg.get_net(batch_size=5, image_shape=(3, 224, 224), num_classes=10, dtype='int32', num_layers=13, batch_norm=True) -ALL_FUNCS = [add, tensor_subtract, expand_dims, batch_flatten, batch_matmul, bias_add, relu, dense, mlp_net, vgg_net] +ALL_FUNCS = [add, tensor_subtract, expand_dims, batch_flatten, batch_matmul, + bias_add, relu, dense, softmax, conv2d, max_pool2d, mlp_net, vgg_net] FUNC_NAMES = list(map(lambda x: x.__name__, ALL_FUNCS)) diff --git a/frontends/relay-futil/futil_ast.py b/frontends/relay-futil/futil_ast.py index 03f2e8fa0e..e1194a826c 100644 --- a/frontends/relay-futil/futil_ast.py +++ b/frontends/relay-futil/futil_ast.py @@ -4,6 +4,7 @@ from types import FunctionType from enum import Enum, IntEnum + # Note: The integer value N for Memory with dimension N is used; these should remain unchanged. class PrimitiveType(IntEnum): Memory1D = 1 @@ -110,65 +111,43 @@ class FComponent: Represents a FuTIL component. ''' name: str - cells: List[Cell] # Instantiated sub-components. - wires: List[FConnection] # Wire connections between components. + wires = [] # Wire connections between components. + cells = {} # Instantiated sub-components. This is a mapping from {`dahlia_name`, FCell}. controls: FControl = None # Control statement for this component. signature: FSignature = None # Input and output ports. - def contains_primitive(self, name: str): - ''' - Determines whether this component contains a primitive with the given name. - ''' - # TODO(cgyurgyik): Rethink data structure here. - for cell in self.cells: - if not cell.is_primitive(): continue - if cell.primitive.name == name: return True - return False - def add_cell(self, subcomponent: Cell): ''' Appends a subcomponent to this component's list of FuTIL cells. ''' - if not subcomponent.is_primitive(): - self.cells.append(subcomponent) - return - if self.contains_primitive(subcomponent.primitive.name): return - self.cells.append(subcomponent) + if subcomponent == None: return + if subcomponent.is_primitive(): + self.cells[subcomponent.primitive.name] = subcomponent + elif subcomponent.is_relay_function(): + self.cells[subcomponent.relay_function.name] = subcomponent @dataclass -class DahliaDeclaration: - decl_name: str +class RelayFunctionCall: + """ + Represents a Relay function call. This will eventually be translated to Dahlia and subsequently lowered to FuTIL. + """ + name: str component_name: str - op: str = None + op: str = None # Binary operation associated with the Relay function call, if it exists. + attributes: tvm.ir.Attrs = None # Attributes associated with the Relay function call, e.g. `axis`, `padding`. + lowering_function: FunctionType = None # The function used to convert the Dahlia representation to FuTIL. inputs: List[Cell] = None output: Cell = None - attributes: tvm.ir.Attrs = None - function: FunctionType = None - program: str = None - - def invoke(self): - self.program = self.function(self) - - -@dataclass -class FDeclaration: - ''' - Represents a FuTIL declaration. - ''' - name: str - component: FComponent = None @dataclass class FCell(Cell): dahlia_name: str = None primitive: FPrimitive = None - declaration: FDeclaration = None - dahlia_declaration: DahliaDeclaration = None + relay_function: RelayFunctionCall = None + # TODO(cgyurgyik): Is there a better way to do this, such as std::variant in C++? def is_primitive(self): return self.primitive != None - def is_declaration(self): return self.declaration != None - - def is_dahlia_declaration(self): return self.dahlia_declaration != None + def is_relay_function(self): return self.relay_function != None diff --git a/frontends/relay-futil/pretty_print.py b/frontends/relay-futil/pretty_print.py index 7a57e6e6b7..2ce59f4139 100644 --- a/frontends/relay-futil/pretty_print.py +++ b/frontends/relay-futil/pretty_print.py @@ -2,14 +2,14 @@ import textwrap -def mk_block(decl, contents, indent=2): +def pp_block(decl, contents, indent=2): """Format a block like this: decl { contents } where `decl` is one line but contents can be multiple lines. """ - return decl + ' {\n' + textwrap.indent(contents, indent * ' ') + '\n}' + return ''.join((decl, ' {\n', textwrap.indent(contents, indent * ' '), '\n}')) def pp_component_signature(component: FComponent): @@ -39,7 +39,7 @@ def pp_connections(component: FConnection): wires = [] for wire in connection.group.wires: wires.append(pp_wire(wire)) - connections.append(mk_block(f'group {connection.group.name}', '\n'.join(wires))) + connections.append(pp_block(f'group {connection.group.name}', '\n'.join(wires))) return connections @@ -49,24 +49,57 @@ def pp_control(component: FComponent): groups = [] for group_name in control.stmts: groups.append(f'{group_name};') - ctrls.append(mk_block(control.name, '\n'.join(groups))) + ctrls.append(pp_block(control.name, '\n'.join(groups))) return ctrls -def pp_component(component: FComponent): +def pp_lowered_dahlia_components(component: FComponent): + relay_functions = [] + for cell in component.cells.values(): + if cell == None or not cell.is_relay_function(): continue + relay_call = cell.relay_function + relay_functions.append(relay_call.lowering_function(relay_call)) + return '\n'.join(relay_functions) + + +def pp_lowered_relay_function(component: FComponent): + """ + Pretty prints the main program. This consists of the following: + 1. Relay functions lowered from Dahlia -> FuTIL. + 2. The `main` component. + + Example: + ------------------------------------ + Input + ``` + fn (%x: int32, %y: int32) { let %z = add(%x, %y); %z } + ``` + ------------------------------------ + Output + ``` + component add(...) -> (...) { ... } + + component main() -> () { + ... + control { run_add; } + } + ``` + """ + relay_function_components = pp_lowered_dahlia_components(component) + subcomponents = [] - for cell in component.cells: - if cell == None: - continue + for cell in component.cells.values(): + if cell == None: continue subcomponents.append(pp_cell(cell)) - cells = mk_block("cells", '\n'.join(subcomponents)) + cells = pp_block("cells", '\n'.join(subcomponents)) inputs, outputs = pp_component_signature(component) - wires = mk_block("wires", '\n'.join(pp_connections(component))) - - controls = "" if component.controls == None else '\n'.join(pp_control(component)) - control = mk_block("control", controls) + wires = pp_block("wires", '\n'.join(pp_connections(component))) - return mk_block(f'component {component.name} ({inputs}) -> ({outputs})', '\n'.join([cells, wires, control])) + controls = '\n'.join(pp_control(component)) + control = pp_block("control", controls) + main_component = pp_block(f'component {component.name} ({inputs}) -> ({outputs})', + '\n'.join([cells, wires, control])) + return '\n'.join((relay_function_components, main_component)) def pp_cell(cell: FCell): @@ -100,59 +133,5 @@ def pp_cell(cell: FCell): if cell.primitive.type == PrimitiveType.BinOp: op = data[1] return f'{cell.primitive.name} = prim std_{op}({bitwidth});' - assert False, f'FCell pretty print unimplemented for {cell} with name {cell.primitive.name}' - elif cell.is_declaration(): - return f'{cell.declaration.name} = {cell.declaration.component.name};' - elif cell.is_dahlia_declaration(): - return f'{cell.dahlia_declaration.decl_name} = {cell.dahlia_declaration.component_name};' - - -# Dahlia Pretty Printing. - -def next_character(ch, dir=1): - """ - Returns the next character after 'ch'. - If dir is positive, then will return 'ch' + 1. Otherwise, it will return 'ch' - 1. - """ - return chr(ord(ch) + dir) if dir > 0 else chr(ord(ch) - 1) - - -def pp_dahlia_memory_declarations(declaration_list): - declarations = [] - for decl in declaration_list: - decl_string = f'decl {decl.name}: {decl.data_type}<{decl.data[0]}>' - for i in range(0, decl.type): decl_string += f'[{decl.data[i + 1]}]' - declarations.append(f'{decl_string};') - return '\n'.join(declarations) - - -def pp_dahlia_loop(data, body): - """ - Returns an iteration over data with `body` as the work done within the nested loop(s). - Many tensor functions share the same control flow: (1) Iterate over `data`, and (2) do some work in body. - For example, if `data` is a 2D primitive of size (M, N) and body == `X;`, then this will return: - - ``` - for (let i: ubit = 0..M) { - for (let j: ubit = 0..N) { - X; - } - } - ``` - """ - variable_name = chr(ord('i')) - num_dimensions = data.type - - program = [] - SPACING = '' - for i in range(0, num_dimensions): - size, index_size = data.data[i + 1], data.data[i + num_dimensions + 1] - program.append(f'{SPACING}for (let {variable_name}: ubit<{index_size}> = 0..{size}) {{') - variable_name = next_character(variable_name) - SPACING += ' ' - program.append(f'{SPACING}{body}') - - for i in range(0, num_dimensions): - SPACING = SPACING[:-2] - program.append(f'{SPACING}}}') - return '\n'.join(program) + if cell.is_relay_function(): return f'{cell.relay_function.name} = {cell.relay_function.component_name};' + assert False, f'FCell pretty print unimplemented for {cell} with name {cell.primitive.name}' diff --git a/frontends/relay-futil/tests/add.expect b/frontends/relay-futil/tests/add.expect index f239d18b42..8c08e35f31 100644 --- a/frontends/relay-futil/tests/add.expect +++ b/frontends/relay-futil/tests/add.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component add(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { +component add0(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { add0 = prim std_add(32); add1 = prim std_add(1); @@ -74,25 +74,25 @@ component main () -> () { z = prim std_mem_d1(32, 1, 1); x = prim std_mem_d1(32, 1, 1); y = prim std_mem_d1(32, 1, 1); - add0 = add; + comp_add0 = add0; } wires { - group run_add { - x.addr0 = add0.x0_addr0; - add0.x0_read_data = x.read_data; - y.addr0 = add0.y0_addr0; - add0.y0_read_data = y.read_data; - z.addr0 = add0.z0_addr0; - z.write_data = add0.z0_write_data; - z.write_en = add0.z0_write_en; - add0.z0_done = z.done; - add0.go = 1'd1; - run_add[done] = add0.done ? 1'd1; + group run_add0 { + x.addr0 = comp_add0.x0_addr0; + comp_add0.x0_read_data = x.read_data; + y.addr0 = comp_add0.y0_addr0; + comp_add0.y0_read_data = y.read_data; + z.addr0 = comp_add0.z0_addr0; + z.write_data = comp_add0.z0_write_data; + z.write_en = comp_add0.z0_write_en; + comp_add0.z0_done = z.done; + comp_add0.go = 1'd1; + run_add0[done] = comp_add0.done ? 1'd1; } } control { seq { - run_add; + run_add0; } } } diff --git a/frontends/relay-futil/tests/batch_flatten.expect b/frontends/relay-futil/tests/batch_flatten.expect index 6927e4ad85..c1d01a7bae 100644 --- a/frontends/relay-futil/tests/batch_flatten.expect +++ b/frontends/relay-futil/tests/batch_flatten.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component batch_flatten(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, x0_0_0_addr0: 1, x0_0_0_addr1: 2, x0_0_0_addr2: 2, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { +component batch_flatten0(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, x0_0_0_addr0: 1, x0_0_0_addr1: 2, x0_0_0_addr2: 2, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { cells { add0 = prim std_add(3); add1 = prim std_add(2); @@ -139,26 +139,26 @@ component main () -> () { cells { x1 = prim std_mem_d2(32, 1, 4, 1, 3); x = prim std_mem_d3(32, 1, 2, 2, 1, 2, 2); - batch_flatten0 = batch_flatten; + comp_batch_flatten0 = batch_flatten0; } wires { - group run_batch_flatten { - x.addr0 = batch_flatten0.x0_0_0_addr0; - batch_flatten0.x0_0_0_read_data = x.read_data; - x.addr1 = batch_flatten0.x0_0_0_addr1; - x.addr2 = batch_flatten0.x0_0_0_addr2; - x1.addr0 = batch_flatten0.x10_0_addr0; - x1.addr1 = batch_flatten0.x10_0_addr1; - x1.write_data = batch_flatten0.x10_0_write_data; - x1.write_en = batch_flatten0.x10_0_write_en; - batch_flatten0.x10_0_done = x1.done; - batch_flatten0.go = 1'd1; - run_batch_flatten[done] = batch_flatten0.done ? 1'd1; + group run_batch_flatten0 { + x.addr0 = comp_batch_flatten0.x0_0_0_addr0; + comp_batch_flatten0.x0_0_0_read_data = x.read_data; + x.addr1 = comp_batch_flatten0.x0_0_0_addr1; + x.addr2 = comp_batch_flatten0.x0_0_0_addr2; + x1.addr0 = comp_batch_flatten0.x10_0_addr0; + x1.addr1 = comp_batch_flatten0.x10_0_addr1; + x1.write_data = comp_batch_flatten0.x10_0_write_data; + x1.write_en = comp_batch_flatten0.x10_0_write_en; + comp_batch_flatten0.x10_0_done = x1.done; + comp_batch_flatten0.go = 1'd1; + run_batch_flatten0[done] = comp_batch_flatten0.done ? 1'd1; } } control { seq { - run_batch_flatten; + run_batch_flatten0; } } } diff --git a/frontends/relay-futil/tests/batch_matmul.expect b/frontends/relay-futil/tests/batch_matmul.expect index 0bf73d4754..93a95d5712 100644 --- a/frontends/relay-futil/tests/batch_matmul.expect +++ b/frontends/relay-futil/tests/batch_matmul.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component batch_matmul(go: 1, clk: 1, a0_0_0_read_data: 32, a0_0_0_done: 1, b0_0_0_read_data: 32, b0_0_0_done: 1, x0_0_0_read_data: 32, x0_0_0_done: 1) -> (done: 1, a0_0_0_addr0: 3, a0_0_0_addr1: 3, a0_0_0_addr2: 3, a0_0_0_write_data: 32, a0_0_0_write_en: 1, a0_0_0_clk: 1, b0_0_0_addr0: 3, b0_0_0_addr1: 3, b0_0_0_addr2: 3, b0_0_0_write_data: 32, b0_0_0_write_en: 1, b0_0_0_clk: 1, x0_0_0_addr0: 3, x0_0_0_addr1: 3, x0_0_0_addr2: 3, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1) { +component batch_matmul0(go: 1, clk: 1, a0_0_0_read_data: 32, a0_0_0_done: 1, b0_0_0_read_data: 32, b0_0_0_done: 1, x0_0_0_read_data: 32, x0_0_0_done: 1) -> (done: 1, a0_0_0_addr0: 3, a0_0_0_addr1: 3, a0_0_0_addr2: 3, a0_0_0_write_data: 32, a0_0_0_write_en: 1, a0_0_0_clk: 1, b0_0_0_addr0: 3, b0_0_0_addr1: 3, b0_0_0_addr2: 3, b0_0_0_write_data: 32, b0_0_0_write_en: 1, b0_0_0_clk: 1, x0_0_0_addr0: 3, x0_0_0_addr1: 3, x0_0_0_addr2: 3, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(3); @@ -402,31 +402,31 @@ component main () -> () { x = prim std_mem_d3(32, 4, 7, 7, 3, 3, 3); a = prim std_mem_d3(32, 4, 7, 5, 3, 3, 3); b = prim std_mem_d3(32, 4, 7, 5, 3, 3, 3); - batch_matmul0 = batch_matmul; + comp_batch_matmul0 = batch_matmul0; } wires { - group run_batch_matmul { - a.addr0 = batch_matmul0.a0_0_0_addr0; - batch_matmul0.a0_0_0_read_data = a.read_data; - a.addr1 = batch_matmul0.a0_0_0_addr1; - a.addr2 = batch_matmul0.a0_0_0_addr2; - b.addr0 = batch_matmul0.b0_0_0_addr0; - batch_matmul0.b0_0_0_read_data = b.read_data; - b.addr1 = batch_matmul0.b0_0_0_addr1; - b.addr2 = batch_matmul0.b0_0_0_addr2; - x.addr0 = batch_matmul0.x0_0_0_addr0; - x.addr1 = batch_matmul0.x0_0_0_addr1; - x.addr2 = batch_matmul0.x0_0_0_addr2; - x.write_data = batch_matmul0.x0_0_0_write_data; - x.write_en = batch_matmul0.x0_0_0_write_en; - batch_matmul0.x0_0_0_done = x.done; - batch_matmul0.go = 1'd1; - run_batch_matmul[done] = batch_matmul0.done ? 1'd1; + group run_batch_matmul0 { + a.addr0 = comp_batch_matmul0.a0_0_0_addr0; + comp_batch_matmul0.a0_0_0_read_data = a.read_data; + a.addr1 = comp_batch_matmul0.a0_0_0_addr1; + a.addr2 = comp_batch_matmul0.a0_0_0_addr2; + b.addr0 = comp_batch_matmul0.b0_0_0_addr0; + comp_batch_matmul0.b0_0_0_read_data = b.read_data; + b.addr1 = comp_batch_matmul0.b0_0_0_addr1; + b.addr2 = comp_batch_matmul0.b0_0_0_addr2; + x.addr0 = comp_batch_matmul0.x0_0_0_addr0; + x.addr1 = comp_batch_matmul0.x0_0_0_addr1; + x.addr2 = comp_batch_matmul0.x0_0_0_addr2; + x.write_data = comp_batch_matmul0.x0_0_0_write_data; + x.write_en = comp_batch_matmul0.x0_0_0_write_en; + comp_batch_matmul0.x0_0_0_done = x.done; + comp_batch_matmul0.go = 1'd1; + run_batch_matmul0[done] = comp_batch_matmul0.done ? 1'd1; } } control { seq { - run_batch_matmul; + run_batch_matmul0; } } } diff --git a/frontends/relay-futil/tests/bias_add.expect b/frontends/relay-futil/tests/bias_add.expect index 620da35d44..18ba0a8d0e 100644 --- a/frontends/relay-futil/tests/bias_add.expect +++ b/frontends/relay-futil/tests/bias_add.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component bias_add(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, bias0_addr0: 7, bias0_write_data: 32, bias0_write_en: 1, bias0_clk: 1, x0_0_0_0_addr0: 1, x0_0_0_0_addr1: 7, x0_0_0_0_addr2: 10, x0_0_0_0_addr3: 9, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 1, x10_0_0_0_addr1: 7, x10_0_0_0_addr2: 10, x10_0_0_0_addr3: 9, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { +component bias_add0(go: 1, clk: 1, bias0_read_data: 32, bias0_done: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, bias0_addr0: 7, bias0_write_data: 32, bias0_write_en: 1, bias0_clk: 1, x0_0_0_0_addr0: 1, x0_0_0_0_addr1: 7, x0_0_0_0_addr2: 10, x0_0_0_0_addr3: 9, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 1, x10_0_0_0_addr1: 7, x10_0_0_0_addr2: 10, x10_0_0_0_addr3: 9, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { cells { add0 = prim fixed_p_std_add(32, 16, 16); add1 = prim std_add(9); @@ -167,27 +167,31 @@ component main () -> () { x1 = prim std_mem_d4(32, 1, 64, 512, 256, 1, 7, 10, 9); x = prim std_mem_d4(32, 1, 64, 512, 256, 1, 7, 10, 9); bias = prim std_mem_d1(32, 64, 7); - bias_add0 = bias_add; + comp_bias_add0 = bias_add0; } wires { - group run_bias_add { - x.addr0 = bias_add0.x0_0_0_0_addr0; - bias_add0.x0_0_0_0_read_data = x.read_data; - x.addr1 = bias_add0.x0_0_0_0_addr1; - x.addr2 = bias_add0.x0_0_0_0_addr2; - bias.addr0 = bias_add0.bias0_addr0; - bias_add0.bias0_read_data = bias.read_data; - x1.addr0 = bias_add0.x10_0_0_0_addr0; - x1.write_data = bias_add0.x10_0_0_0_write_data; - x1.write_en = bias_add0.x10_0_0_0_write_en; - bias_add0.x10_0_0_0_done = x1.done; - bias_add0.go = 1'd1; - run_bias_add[done] = bias_add0.done ? 1'd1; + group run_bias_add0 { + x.addr0 = comp_bias_add0.x0_0_0_0_addr0; + comp_bias_add0.x0_0_0_0_read_data = x.read_data; + x.addr1 = comp_bias_add0.x0_0_0_0_addr1; + x.addr2 = comp_bias_add0.x0_0_0_0_addr2; + x.addr3 = comp_bias_add0.x0_0_0_0_addr3; + bias.addr0 = comp_bias_add0.bias0_addr0; + comp_bias_add0.bias0_read_data = bias.read_data; + x1.addr0 = comp_bias_add0.x10_0_0_0_addr0; + x1.addr1 = comp_bias_add0.x10_0_0_0_addr1; + x1.addr2 = comp_bias_add0.x10_0_0_0_addr2; + x1.addr3 = comp_bias_add0.x10_0_0_0_addr3; + x1.write_data = comp_bias_add0.x10_0_0_0_write_data; + x1.write_en = comp_bias_add0.x10_0_0_0_write_en; + comp_bias_add0.x10_0_0_0_done = x1.done; + comp_bias_add0.go = 1'd1; + run_bias_add0[done] = comp_bias_add0.done ? 1'd1; } } control { seq { - run_bias_add; + run_bias_add0; } } } diff --git a/frontends/relay-futil/tests/broadcast.expect b/frontends/relay-futil/tests/broadcast.expect index 84f5962b54..5eb74f0ca1 100644 --- a/frontends/relay-futil/tests/broadcast.expect +++ b/frontends/relay-futil/tests/broadcast.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component add(go: 1, clk: 1, x10_0_read_data: 32, x10_0_done: 1, x20_0_0_read_data: 32, x20_0_0_done: 1, x30_0_0_read_data: 32, x30_0_0_done: 1) -> (done: 1, x10_0_addr0: 2, x10_0_addr1: 2, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, x20_0_0_addr0: 2, x20_0_0_addr1: 1, x20_0_0_addr2: 1, x20_0_0_write_data: 32, x20_0_0_write_en: 1, x20_0_0_clk: 1, x30_0_0_addr0: 2, x30_0_0_addr1: 2, x30_0_0_addr2: 2, x30_0_0_write_data: 32, x30_0_0_write_en: 1, x30_0_0_clk: 1) { +component add0(go: 1, clk: 1, x10_0_read_data: 32, x10_0_done: 1, x20_0_0_read_data: 32, x20_0_0_done: 1, x30_0_0_read_data: 32, x30_0_0_done: 1) -> (done: 1, x10_0_addr0: 2, x10_0_addr1: 2, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, x20_0_0_addr0: 2, x20_0_0_addr1: 1, x20_0_0_addr2: 1, x20_0_0_write_data: 32, x20_0_0_write_en: 1, x20_0_0_clk: 1, x30_0_0_addr0: 2, x30_0_0_addr1: 2, x30_0_0_addr2: 2, x30_0_0_write_data: 32, x30_0_0_write_en: 1, x30_0_0_clk: 1) { cells { add0 = prim std_add(32); add1 = prim std_add(2); @@ -139,30 +139,30 @@ component main () -> () { x3 = prim std_mem_d3(32, 2, 2, 2, 2, 2, 2); x1 = prim std_mem_d2(32, 2, 2, 2, 2); x2 = prim std_mem_d3(32, 2, 1, 1, 2, 1, 1); - add0 = add; + comp_add0 = add0; } wires { - group run_add { - x1.addr0 = add0.x10_0_addr0; - add0.x10_0_read_data = x1.read_data; - x1.addr1 = add0.x10_0_addr1; - x2.addr0 = add0.x20_0_0_addr0; - add0.x20_0_0_read_data = x2.read_data; - x2.addr1 = add0.x20_0_0_addr1; - x2.addr2 = add0.x20_0_0_addr2; - x3.addr0 = add0.x30_0_0_addr0; - x3.addr1 = add0.x30_0_0_addr1; - x3.addr2 = add0.x30_0_0_addr2; - x3.write_data = add0.x30_0_0_write_data; - x3.write_en = add0.x30_0_0_write_en; - add0.x30_0_0_done = x3.done; - add0.go = 1'd1; - run_add[done] = add0.done ? 1'd1; + group run_add0 { + x1.addr0 = comp_add0.x10_0_addr0; + comp_add0.x10_0_read_data = x1.read_data; + x1.addr1 = comp_add0.x10_0_addr1; + x2.addr0 = comp_add0.x20_0_0_addr0; + comp_add0.x20_0_0_read_data = x2.read_data; + x2.addr1 = comp_add0.x20_0_0_addr1; + x2.addr2 = comp_add0.x20_0_0_addr2; + x3.addr0 = comp_add0.x30_0_0_addr0; + x3.addr1 = comp_add0.x30_0_0_addr1; + x3.addr2 = comp_add0.x30_0_0_addr2; + x3.write_data = comp_add0.x30_0_0_write_data; + x3.write_en = comp_add0.x30_0_0_write_en; + comp_add0.x30_0_0_done = x3.done; + comp_add0.go = 1'd1; + run_add0[done] = comp_add0.done ? 1'd1; } } control { seq { - run_add; + run_add0; } } } diff --git a/frontends/relay-futil/tests/conv2d.expect b/frontends/relay-futil/tests/conv2d.expect new file mode 100644 index 0000000000..aa5ca04744 --- /dev/null +++ b/frontends/relay-futil/tests/conv2d.expect @@ -0,0 +1,395 @@ +import "primitives/std.lib"; + +component conv2d0(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, weight0_0_0_0_read_data: 32, weight0_0_0_0_done: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1) -> (done: 1, data0_0_0_0_addr0: 3, data0_0_0_0_addr1: 10, data0_0_0_0_addr2: 4, data0_0_0_0_addr3: 4, data0_0_0_0_write_data: 32, data0_0_0_0_write_en: 1, data0_0_0_0_clk: 1, weight0_0_0_0_addr0: 10, weight0_0_0_0_addr1: 10, weight0_0_0_0_addr2: 2, weight0_0_0_0_addr3: 2, weight0_0_0_0_write_data: 32, weight0_0_0_0_write_en: 1, weight0_0_0_0_clk: 1, x0_0_0_0_addr0: 3, x0_0_0_0_addr1: 10, x0_0_0_0_addr2: 4, x0_0_0_0_addr3: 4, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1) { + cells { + add0 = prim std_add(32); + add1 = prim std_add(32); + add2 = prim fixed_p_std_add(32, 16, 16); + add3 = prim std_add(32); + add4 = prim std_add(32); + add5 = prim std_add(32); + add6 = prim std_add(32); + add7 = prim std_add(32); + add8 = prim std_add(32); + add9 = prim std_add(32); + b0 = prim std_reg(32); + bin_read0_0 = prim std_reg(32); + bin_read1_0 = prim std_reg(32); + bin_read2_0 = prim std_reg(32); + c0 = prim std_reg(32); + const0 = prim std_const(32, 0); + const1 = prim std_const(32, 4); + const10 = prim std_const(32, 0); + const11 = prim std_const(32, 2); + const12 = prim std_const(32, 0); + const13 = prim std_const(32, 2); + const14 = prim std_const(32, 1); + const15 = prim std_const(32, 1); + const16 = prim std_const(32, 1); + const17 = prim std_const(32, 1); + const18 = prim std_const(32, 1); + const19 = prim std_const(32, 1); + const2 = prim std_const(32, 0); + const20 = prim std_const(32, 1); + const21 = prim std_const(32, 1); + const22 = prim std_const(32, 1); + const3 = prim std_const(32, 511); + const4 = prim std_const(32, 0); + const5 = prim std_const(32, 13); + const6 = prim std_const(32, 0); + const7 = prim std_const(32, 13); + const8 = prim std_const(32, 0); + const9 = prim std_const(32, 511); + data_read0_0 = prim std_reg(32); + dx0 = prim std_reg(32); + dy0 = prim std_reg(32); + fpconst0 = prim fixed_p_std_const(32, 16, 16, 0, 0); + k0 = prim std_reg(32); + kernel_x_0 = prim std_reg(32); + kernel_y_0 = prim std_reg(32); + le0 = prim std_le(32); + le1 = prim std_le(32); + le2 = prim std_le(32); + le3 = prim std_le(32); + le4 = prim std_le(32); + le5 = prim std_le(32); + le6 = prim std_le(32); + mult_pipe0 = prim std_mult_pipe(32); + mult_pipe1 = prim std_mult_pipe(32); + mult_pipe2 = prim std_mult_pipe(32); + slice0 = prim std_slice(32, 3); + slice1 = prim std_slice(32, 10); + slice10 = prim std_slice(32, 10); + slice11 = prim std_slice(32, 4); + slice12 = prim std_slice(32, 4); + slice2 = prim std_slice(32, 4); + slice3 = prim std_slice(32, 4); + slice4 = prim std_slice(32, 10); + slice5 = prim std_slice(32, 10); + slice6 = prim std_slice(32, 2); + slice7 = prim std_slice(32, 2); + slice8 = prim std_slice(32, 32); + slice9 = prim std_slice(32, 3); + sum_0 = prim std_reg(32); + weight_read0_0 = prim std_reg(32); + x0 = prim std_reg(32); + y0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = b0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = c0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = y0.out; + le2.right = const5.out; + } + group cond3<"static"=0> { + cond3[done] = 1'd1; + le3.left = x0.out; + le3.right = const7.out; + } + group cond4<"static"=0> { + cond4[done] = 1'd1; + le4.left = k0.out; + le4.right = const9.out; + } + group cond5<"static"=0> { + cond5[done] = 1'd1; + le5.left = dy0.out; + le5.right = const11.out; + } + group cond6<"static"=0> { + cond6[done] = 1'd1; + le6.left = dx0.out; + le6.right = const13.out; + } + group let0<"static"=1> { + b0.in = const0.out; + b0.write_en = 1'd1; + let0[done] = b0.done; + } + group let1<"static"=1> { + c0.in = const2.out; + c0.write_en = 1'd1; + let1[done] = c0.done; + } + group let10<"static"=4> { + bin_read1_0.in = mult_pipe1.out; + bin_read1_0.write_en = mult_pipe1.done; + let10[done] = bin_read1_0.done; + mult_pipe1.left = const15.out; + mult_pipe1.right = x0.out; + mult_pipe1.go = !mult_pipe1.done ? 1'd1; + } + group let11<"static"=1> { + kernel_x_0.in = add1.out; + kernel_x_0.write_en = 1'd1; + let11[done] = kernel_x_0.done; + add1.left = bin_read1_0.out; + add1.right = dx0.out; + } + group let12<"static"=1> { + bin_read2_0.in = slice8.out; + bin_read2_0.write_en = 1'd1; + let12[done] = bin_read2_0.done; + slice8.in = mult_pipe2.out; + mult_pipe2.left = data_read0_0.out; + mult_pipe2.right = weight_read0_0.out; + mult_pipe2.go = !mult_pipe2.done ? 1'd1; + } + group let2<"static"=1> { + y0.in = const4.out; + y0.write_en = 1'd1; + let2[done] = y0.done; + } + group let3<"static"=1> { + x0.in = const6.out; + x0.write_en = 1'd1; + let3[done] = x0.done; + } + group let4<"static"=1> { + sum_0.in = fpconst0.out; + sum_0.write_en = 1'd1; + let4[done] = sum_0.done; + } + group let5<"static"=1> { + k0.in = const8.out; + k0.write_en = 1'd1; + let5[done] = k0.done; + } + group let6<"static"=1> { + dy0.in = const10.out; + dy0.write_en = 1'd1; + let6[done] = dy0.done; + } + group let7<"static"=1> { + dx0.in = const12.out; + dx0.write_en = 1'd1; + let7[done] = dx0.done; + } + group let8<"static"=4> { + bin_read0_0.in = mult_pipe0.out; + bin_read0_0.write_en = mult_pipe0.done; + let8[done] = bin_read0_0.done; + mult_pipe0.left = const14.out; + mult_pipe0.right = y0.out; + mult_pipe0.go = !mult_pipe0.done ? 1'd1; + } + group let9<"static"=1> { + kernel_y_0.in = add0.out; + kernel_y_0.write_en = 1'd1; + let9[done] = kernel_y_0.done; + add0.left = bin_read0_0.out; + add0.right = dy0.out; + } + group upd0<"static"=1> { + data_read0_0.write_en = 1'd1; + data0_0_0_0_addr3 = slice3.out; + slice3.in = kernel_x_0.out; + data0_0_0_0_addr2 = slice2.out; + slice2.in = kernel_y_0.out; + data0_0_0_0_addr1 = slice1.out; + slice1.in = k0.out; + data0_0_0_0_addr0 = slice0.out; + slice0.in = b0.out; + data_read0_0.in = 1'd1 ? data0_0_0_0_read_data; + upd0[done] = data_read0_0.done ? 1'd1; + } + group upd1<"static"=1> { + weight_read0_0.write_en = 1'd1; + weight0_0_0_0_addr3 = slice7.out; + slice7.in = dx0.out; + weight0_0_0_0_addr2 = slice6.out; + slice6.in = dy0.out; + weight0_0_0_0_addr1 = slice5.out; + slice5.in = k0.out; + weight0_0_0_0_addr0 = slice4.out; + slice4.in = c0.out; + weight_read0_0.in = 1'd1 ? weight0_0_0_0_read_data; + upd1[done] = weight_read0_0.done ? 1'd1; + } + group upd10<"static"=1> { + b0.write_en = 1'd1; + add9.left = b0.out; + add9.right = const22.out; + b0.in = 1'd1 ? add9.out; + upd10[done] = b0.done ? 1'd1; + } + group upd2<"static"=1> { + sum_0.write_en = 1'd1; + add2.left = sum_0.out; + add2.right = bin_read2_0.out; + sum_0.in = 1'd1 ? add2.out; + upd2[done] = sum_0.done ? 1'd1; + } + group upd3<"static"=1> { + dx0.write_en = 1'd1; + add3.left = dx0.out; + add3.right = const16.out; + dx0.in = 1'd1 ? add3.out; + upd3[done] = dx0.done ? 1'd1; + } + group upd4<"static"=1> { + dy0.write_en = 1'd1; + add4.left = dy0.out; + add4.right = const17.out; + dy0.in = 1'd1 ? add4.out; + upd4[done] = dy0.done ? 1'd1; + } + group upd5<"static"=1> { + k0.write_en = 1'd1; + add5.left = k0.out; + add5.right = const18.out; + k0.in = 1'd1 ? add5.out; + upd5[done] = k0.done ? 1'd1; + } + group upd6<"static"=1> { + x0_0_0_0_addr3 = slice12.out; + slice12.in = x0.out; + x0_0_0_0_addr2 = slice11.out; + slice11.in = y0.out; + x0_0_0_0_addr1 = slice10.out; + slice10.in = c0.out; + x0_0_0_0_addr0 = slice9.out; + slice9.in = b0.out; + x0_0_0_0_write_en = 1'd1; + x0_0_0_0_write_data = 1'd1 ? sum_0.out; + upd6[done] = x0_0_0_0_done ? 1'd1; + } + group upd7<"static"=1> { + x0.write_en = 1'd1; + add6.left = x0.out; + add6.right = const19.out; + x0.in = 1'd1 ? add6.out; + upd7[done] = x0.done ? 1'd1; + } + group upd8<"static"=1> { + y0.write_en = 1'd1; + add7.left = y0.out; + add7.right = const20.out; + y0.in = 1'd1 ? add7.out; + upd8[done] = y0.done ? 1'd1; + } + group upd9<"static"=1> { + c0.write_en = 1'd1; + add8.left = c0.out; + add8.right = const21.out; + c0.in = 1'd1 ? add8.out; + upd9[done] = c0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + let2; + while le2.out with cond2 { + seq { + let3; + while le3.out with cond3 { + seq { + par { + let4; + seq { + let5; + while le4.out with cond4 { + seq { + let6; + while le5.out with cond5 { + seq { + let7; + while le6.out with cond6 { + seq { + par { + seq { + let8; + let9; + } + seq { + let10; + let11; + } + } + par { + upd0; + upd1; + } + let12; + upd2; + upd3; + } + } + upd4; + } + } + upd5; + } + } + } + } + upd6; + upd7; + } + } + upd8; + } + } + upd9; + } + } + upd10; + } + } + } + } +} + +component main () -> () { + cells { + x = prim std_mem_d4(32, 5, 512, 14, 14, 3, 10, 4, 4); + data = prim std_mem_d4(32, 5, 512, 14, 14, 3, 10, 4, 4); + weight = prim std_mem_d4(32, 512, 512, 3, 3, 10, 10, 2, 2); + comp_conv2d0 = conv2d0; + } + wires { + group run_conv2d0 { + data.addr0 = comp_conv2d0.data0_0_0_0_addr0; + comp_conv2d0.data0_0_0_0_read_data = data.read_data; + data.addr1 = comp_conv2d0.data0_0_0_0_addr1; + data.addr2 = comp_conv2d0.data0_0_0_0_addr2; + data.addr3 = comp_conv2d0.data0_0_0_0_addr3; + weight.addr0 = comp_conv2d0.weight0_0_0_0_addr0; + comp_conv2d0.weight0_0_0_0_read_data = weight.read_data; + weight.addr1 = comp_conv2d0.weight0_0_0_0_addr1; + weight.addr2 = comp_conv2d0.weight0_0_0_0_addr2; + weight.addr3 = comp_conv2d0.weight0_0_0_0_addr3; + x.addr0 = comp_conv2d0.x0_0_0_0_addr0; + x.addr1 = comp_conv2d0.x0_0_0_0_addr1; + x.addr2 = comp_conv2d0.x0_0_0_0_addr2; + x.addr3 = comp_conv2d0.x0_0_0_0_addr3; + x.write_data = comp_conv2d0.x0_0_0_0_write_data; + x.write_en = comp_conv2d0.x0_0_0_0_write_en; + comp_conv2d0.x0_0_0_0_done = x.done; + comp_conv2d0.go = 1'd1; + run_conv2d0[done] = comp_conv2d0.done ? 1'd1; + } + } + control { + seq { + run_conv2d0; + } + } +} diff --git a/frontends/relay-futil/tests/conv2d.relay b/frontends/relay-futil/tests/conv2d.relay new file mode 100644 index 0000000000..e759bab61a --- /dev/null +++ b/frontends/relay-futil/tests/conv2d.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%data: Tensor[(5, 512, 14, 14), float32], %weight: Tensor[(512, 512, 3, 3), float32]) -> Tensor[(5, 512, 14, 14), float32] { + let %x: Tensor[(5, 512, 14, 14), float32] = nn.conv2d(%data, %weight, padding=[1, 1, 1, 1], channels=512, kernel_size=[3, 3]) /* ty=Tensor[(5, 512, 14, 14), float32] */; + %x +} + diff --git a/frontends/relay-futil/tests/data/conv2d.expect b/frontends/relay-futil/tests/data/conv2d.expect new file mode 100644 index 0000000000..2f8cb5e0be --- /dev/null +++ b/frontends/relay-futil/tests/data/conv2d.expect @@ -0,0 +1,120 @@ +{ + "data": [ + [ + [ + [ + 1, + 1 + ], + [ + 4, + 1 + ] + ], + [ + [ + 1, + 1 + ], + [ + 1, + 1 + ] + ] + ], + [ + [ + [ + 1, + 1 + ], + [ + 1, + 1 + ] + ], + [ + [ + 1, + 1 + ], + [ + 1, + 1 + ] + ] + ] + ], + "weight": [ + [ + [ + [ + 2, + 1 + ], + [ + 1, + 1 + ] + ], + [ + [ + 1, + 1 + ], + [ + 1, + 1 + ] + ] + ], + [ + [ + [ + 1, + 1 + ], + [ + 1, + 1 + ] + ], + [ + [ + 1, + 3 + ], + [ + 1, + 4 + ] + ] + ] + ], + "x": [ + [ + [ + [ + 12 + ] + ], + [ + [ + 16 + ] + ] + ], + [ + [ + [ + 9 + ] + ], + [ + [ + 13 + ] + ] + ] + ] +} diff --git a/frontends/relay-futil/tests/data/conv2d.relay b/frontends/relay-futil/tests/data/conv2d.relay new file mode 100644 index 0000000000..168e53e418 --- /dev/null +++ b/frontends/relay-futil/tests/data/conv2d.relay @@ -0,0 +1,5 @@ +v0.0.4 +fn (%data: Tensor[(2, 2, 2, 2), int32], %weight: Tensor[(2, 2, 2, 2), int32]) { + let %x = nn.conv2d(%data, %weight, channels=2, kernel_size=[2,2]); + %x +} diff --git a/frontends/relay-futil/tests/data/conv2d.relay.data b/frontends/relay-futil/tests/data/conv2d.relay.data new file mode 100644 index 0000000000..81591e0997 --- /dev/null +++ b/frontends/relay-futil/tests/data/conv2d.relay.data @@ -0,0 +1,14 @@ +{ + "data": { + "data": [ [[[1,1], [4,1]], [[1,1], [1,1]]], [[[1,1], [1,1]], [[1,1], [1,1]]] ], + "bitwidth": 32 + }, + "weight": { + "data": [ [[[2,1], [1,1]], [[1,1], [1,1]]], [[[1,1], [1,1]], [[1,3], [1,4]]] ], + "bitwidth": 32 + }, + "x": { + "data": [ [[[0]], [[0]]], [[[0]], [[0]]] ], + "bitwidth": 32 + } +} \ No newline at end of file diff --git a/frontends/relay-futil/tests/data/max_pool2d.expect b/frontends/relay-futil/tests/data/max_pool2d.expect new file mode 100644 index 0000000000..2e4f5739ae --- /dev/null +++ b/frontends/relay-futil/tests/data/max_pool2d.expect @@ -0,0 +1,158 @@ +{ + "data": [ + [ + [ + [ + 10, + 20, + 100, + 101 + ], + [ + 30, + 40, + 102, + 103 + ], + [ + 20, + 30, + 100, + 103 + ], + [ + 10, + 40, + 103, + 100 + ] + ], + [ + [ + 20, + 0, + 70, + 25 + ], + [ + 1, + 2, + 13, + 4 + ], + [ + 1, + 2, + 5, + 6 + ], + [ + 3, + 4, + 7, + 8 + ] + ] + ], + [ + [ + [ + 11, + 21, + 109, + 10 + ], + [ + 31, + 41, + 0, + 14 + ], + [ + 19, + 42, + 1, + 103 + ], + [ + 1, + 18, + 10, + 101 + ] + ], + [ + [ + 1, + 2, + 4, + 3 + ], + [ + 3, + 4, + 2, + 1 + ], + [ + 4, + 2, + 2, + 4 + ], + [ + 1, + 3, + 3, + 1 + ] + ] + ] + ], + "result": [ + [ + [ + [ + 40, + 103 + ], + [ + 40, + 103 + ] + ], + [ + [ + 20, + 70 + ], + [ + 4, + 8 + ] + ] + ], + [ + [ + [ + 41, + 109 + ], + [ + 42, + 103 + ] + ], + [ + [ + 4, + 4 + ], + [ + 4, + 4 + ] + ] + ] + ] +} diff --git a/frontends/relay-futil/tests/data/max_pool2d.relay b/frontends/relay-futil/tests/data/max_pool2d.relay new file mode 100644 index 0000000000..e1ba79d351 --- /dev/null +++ b/frontends/relay-futil/tests/data/max_pool2d.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%data: Tensor[(2, 2, 4, 4), int32]) { + let %result: Tensor[(2, 2, 2, 2), int32] = nn.max_pool2d(%data, pool_size=[2, 2], strides=[2, 2], padding=[0,0,0,0]); + %result +} + diff --git a/frontends/relay-futil/tests/data/max_pool2d.relay.data b/frontends/relay-futil/tests/data/max_pool2d.relay.data new file mode 100644 index 0000000000..517b34c9cc --- /dev/null +++ b/frontends/relay-futil/tests/data/max_pool2d.relay.data @@ -0,0 +1,43 @@ +{ + "data": { + "data": [ + [ + [ + [10,20, 100,101], + [30,40, 102,103], + + [20,30, 100,103], + [10,40, 103,100] + ], + [ + [20,0, 70,25], + [1, 2, 13,4], + + [1,2, 5,6], + [3,4, 7,8] + ] + ], + [ + [ + [11,21, 109,10], + [31,41, 0,14], + + [19,42, 1,103], + [1,18, 10,101] + ], + [ + [1,2, 4,3], + [3,4, 2,1], + + [4,2, 2,4], + [1,3, 3,1] + ] + ] + ], + "bitwidth": 32 + }, + "result": { + "data": [ [[[0,0], [0,0]], [[0,0], [0,0]]], [[[0,0], [0,0]], [[0,0], [0,0]]] ], + "bitwidth": 32 + } +} \ No newline at end of file diff --git a/frontends/relay-futil/tests/data/softmax.expect b/frontends/relay-futil/tests/data/softmax.expect new file mode 100644 index 0000000000..1073dc7c6c --- /dev/null +++ b/frontends/relay-futil/tests/data/softmax.expect @@ -0,0 +1,14 @@ +{ + "x": [ + [ + 4, + 16 + ] + ], + "x1": [ + [ + 0, + 0 + ] + ] +} diff --git a/frontends/relay-futil/tests/data/softmax.relay b/frontends/relay-futil/tests/data/softmax.relay new file mode 100644 index 0000000000..858ae52126 --- /dev/null +++ b/frontends/relay-futil/tests/data/softmax.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%x: Tensor[(1, 2), float32]) { + let %x1: Tensor[(1, 2), float32] = nn.softmax(%x); + %x1 +} + diff --git a/frontends/relay-futil/tests/data/softmax.relay.data b/frontends/relay-futil/tests/data/softmax.relay.data new file mode 100644 index 0000000000..f0d81e4e55 --- /dev/null +++ b/frontends/relay-futil/tests/data/softmax.relay.data @@ -0,0 +1,10 @@ +{ + "x": { + "data": [[4, 16]], + "bitwidth": 32 + }, + "x1": { + "data": [[0, 0]], + "bitwidth": 32 + } +} \ No newline at end of file diff --git a/frontends/relay-futil/tests/data/tensor4d_multiply.expect b/frontends/relay-futil/tests/data/tensor4d_multiply.expect new file mode 100644 index 0000000000..bd548739d2 --- /dev/null +++ b/frontends/relay-futil/tests/data/tensor4d_multiply.expect @@ -0,0 +1,344 @@ +{ + "x": [ + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ] + ], + "x1": [ + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ], + [ + [ + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ], + [ + 1, + 2, + 3, + 4 + ] + ] + ] + ], + "x2": [ + [ + [ + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ] + ] + ], + [ + [ + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ] + ] + ], + [ + [ + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ] + ] + ], + [ + [ + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ], + [ + 1, + 4, + 9, + 16 + ] + ] + ] + ] +} diff --git a/frontends/relay-futil/tests/data/tensor4d_multiply.relay b/frontends/relay-futil/tests/data/tensor4d_multiply.relay new file mode 100644 index 0000000000..197d3c9564 --- /dev/null +++ b/frontends/relay-futil/tests/data/tensor4d_multiply.relay @@ -0,0 +1,5 @@ +v0.0.4 +fn (%x: Tensor[(2, 2, 4, 4), int32], %x1: Tensor[(2, 2, 4, 4), int32]) { + let %x2: Tensor[(2, 2, 4, 4), int32] = multiply(%x, %x1); + %x2 +} diff --git a/frontends/relay-futil/tests/data/tensor4d_multiply.relay.data b/frontends/relay-futil/tests/data/tensor4d_multiply.relay.data new file mode 100644 index 0000000000..6cdaa8c7a7 --- /dev/null +++ b/frontends/relay-futil/tests/data/tensor4d_multiply.relay.data @@ -0,0 +1,23 @@ +{ + "x": { + "data": [ + [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], + [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]] + ], + "bitwidth": 32 + }, + "x1": { + "data": [ + [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], + [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]], [[[1,2,3,4], [1,2,3,4], [1,2,3,4], [1,2,3,4]]] + ], + "bitwidth": 32 + }, + "x2": { + "data": [ + [[[0,0,0,0], [0,0,0,0], [0,0,0,0], [0,0,0,0]]], [[[0,0,0,0], [0,0,0,0], [0,0,0,0], [0,0,0,0]]], + [[[0,0,0,0], [0,0,0,0], [0,0,0,0], [0,0,0,0]]], [[[0,0,0,0], [0,0,0,0], [0,0,0,0], [0,0,0,0]]] + ], + "bitwidth": 32 + } +} \ No newline at end of file diff --git a/frontends/relay-futil/tests/dense.expect b/frontends/relay-futil/tests/dense.expect index a0d5ead2b1..9ca0f57adb 100644 --- a/frontends/relay-futil/tests/dense.expect +++ b/frontends/relay-futil/tests/dense.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component dense(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1, y0_0_read_data: 32, y0_0_done: 1) -> (done: 1, x0_0_addr0: 1, x0_0_addr1: 13, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 4, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, y0_0_addr0: 4, y0_0_addr1: 13, y0_0_write_data: 32, y0_0_write_en: 1, y0_0_clk: 1) { +component dense0(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1, y0_0_read_data: 32, y0_0_done: 1) -> (done: 1, x0_0_addr0: 1, x0_0_addr1: 13, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 4, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, y0_0_addr0: 4, y0_0_addr1: 13, y0_0_write_data: 32, y0_0_write_en: 1, y0_0_clk: 1) { cells { add0 = prim std_add(13); add1 = prim std_add(4); @@ -307,28 +307,28 @@ component main () -> () { x1 = prim std_mem_d2(32, 1, 10, 1, 4); x = prim std_mem_d2(32, 1, 4096, 1, 13); y = prim std_mem_d2(32, 10, 4096, 4, 13); - dense0 = dense; + comp_dense0 = dense0; } wires { - group run_dense { - x.addr0 = dense0.x0_0_addr0; - dense0.x0_0_read_data = x.read_data; - x.addr1 = dense0.x0_0_addr1; - y.addr0 = dense0.y0_0_addr0; - dense0.y0_0_read_data = y.read_data; - y.addr1 = dense0.y0_0_addr1; - x1.addr0 = dense0.x10_0_addr0; - x1.addr1 = dense0.x10_0_addr1; - x1.write_data = dense0.x10_0_write_data; - x1.write_en = dense0.x10_0_write_en; - dense0.x10_0_done = x1.done; - dense0.go = 1'd1; - run_dense[done] = dense0.done ? 1'd1; + group run_dense0 { + x.addr0 = comp_dense0.x0_0_addr0; + comp_dense0.x0_0_read_data = x.read_data; + x.addr1 = comp_dense0.x0_0_addr1; + y.addr0 = comp_dense0.y0_0_addr0; + comp_dense0.y0_0_read_data = y.read_data; + y.addr1 = comp_dense0.y0_0_addr1; + x1.addr0 = comp_dense0.x10_0_addr0; + x1.addr1 = comp_dense0.x10_0_addr1; + x1.write_data = comp_dense0.x10_0_write_data; + x1.write_en = comp_dense0.x10_0_write_en; + comp_dense0.x10_0_done = x1.done; + comp_dense0.go = 1'd1; + run_dense0[done] = comp_dense0.done ? 1'd1; } } control { seq { - run_dense; + run_dense0; } } } diff --git a/frontends/relay-futil/tests/fixed_point_add.expect b/frontends/relay-futil/tests/fixed_point_add.expect index aa8240b4cf..9c4910177e 100644 --- a/frontends/relay-futil/tests/fixed_point_add.expect +++ b/frontends/relay-futil/tests/fixed_point_add.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component add(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { +component add0(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { add0 = prim fixed_p_std_add(32, 16, 16); add1 = prim std_add(1); @@ -74,25 +74,25 @@ component main () -> () { z = prim std_mem_d1(32, 1, 1); x = prim std_mem_d1(32, 1, 1); y = prim std_mem_d1(32, 1, 1); - add0 = add; + comp_add0 = add0; } wires { - group run_add { - x.addr0 = add0.x0_addr0; - add0.x0_read_data = x.read_data; - y.addr0 = add0.y0_addr0; - add0.y0_read_data = y.read_data; - z.addr0 = add0.z0_addr0; - z.write_data = add0.z0_write_data; - z.write_en = add0.z0_write_en; - add0.z0_done = z.done; - add0.go = 1'd1; - run_add[done] = add0.done ? 1'd1; + group run_add0 { + x.addr0 = comp_add0.x0_addr0; + comp_add0.x0_read_data = x.read_data; + y.addr0 = comp_add0.y0_addr0; + comp_add0.y0_read_data = y.read_data; + z.addr0 = comp_add0.z0_addr0; + z.write_data = comp_add0.z0_write_data; + z.write_en = comp_add0.z0_write_en; + comp_add0.z0_done = z.done; + comp_add0.go = 1'd1; + run_add0[done] = comp_add0.done ? 1'd1; } } control { seq { - run_add; + run_add0; } } } diff --git a/frontends/relay-futil/tests/let1.expect b/frontends/relay-futil/tests/let1.expect index cf228003ae..e59cbebcd9 100644 --- a/frontends/relay-futil/tests/let1.expect +++ b/frontends/relay-futil/tests/let1.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component multiply(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { +component multiply0(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(1); @@ -82,25 +82,25 @@ component main () -> () { z = prim std_mem_d1(32, 1, 1); a = prim std_mem_d1(32, 1, 1); b = prim std_mem_d1(32, 1, 1); - multiply0 = multiply; + comp_multiply0 = multiply0; } wires { - group run_multiply { - a.addr0 = multiply0.a0_addr0; - multiply0.a0_read_data = a.read_data; - b.addr0 = multiply0.b0_addr0; - multiply0.b0_read_data = b.read_data; - z.addr0 = multiply0.z0_addr0; - z.write_data = multiply0.z0_write_data; - z.write_en = multiply0.z0_write_en; - multiply0.z0_done = z.done; - multiply0.go = 1'd1; - run_multiply[done] = multiply0.done ? 1'd1; + group run_multiply0 { + a.addr0 = comp_multiply0.a0_addr0; + comp_multiply0.a0_read_data = a.read_data; + b.addr0 = comp_multiply0.b0_addr0; + comp_multiply0.b0_read_data = b.read_data; + z.addr0 = comp_multiply0.z0_addr0; + z.write_data = comp_multiply0.z0_write_data; + z.write_en = comp_multiply0.z0_write_en; + comp_multiply0.z0_done = z.done; + comp_multiply0.go = 1'd1; + run_multiply0[done] = comp_multiply0.done ? 1'd1; } } control { seq { - run_multiply; + run_multiply0; } } } diff --git a/frontends/relay-futil/tests/let2.expect b/frontends/relay-futil/tests/let2.expect index b9a9bfd9ec..451a17e8df 100644 --- a/frontends/relay-futil/tests/let2.expect +++ b/frontends/relay-futil/tests/let2.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component add(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { +component add0(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(32); @@ -68,7 +68,8 @@ component add(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_ } } } -component multiply(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { + +component multiply0(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(1); @@ -150,40 +151,40 @@ component main () -> () { d = prim std_mem_d1(32, 1, 1); c = prim std_mem_d1(32, 1, 1); a = prim std_mem_d1(32, 1, 1); - add0 = add; + comp_add0 = add0; b = prim std_mem_d1(32, 1, 1); - multiply0 = multiply; + comp_multiply0 = multiply0; } wires { - group run_multiply { - a.addr0 = multiply0.a0_addr0; - multiply0.a0_read_data = a.read_data; - b.addr0 = multiply0.b0_addr0; - multiply0.b0_read_data = b.read_data; - c.addr0 = multiply0.c0_addr0; - c.write_data = multiply0.c0_write_data; - c.write_en = multiply0.c0_write_en; - multiply0.c0_done = c.done; - multiply0.go = 1'd1; - run_multiply[done] = multiply0.done ? 1'd1; - } - group run_add { - c.addr0 = add0.c0_addr0; - add0.c0_read_data = c.read_data; - a.addr0 = add0.a0_addr0; - add0.a0_read_data = a.read_data; - d.addr0 = add0.d0_addr0; - d.write_data = add0.d0_write_data; - d.write_en = add0.d0_write_en; - add0.d0_done = d.done; - add0.go = 1'd1; - run_add[done] = add0.done ? 1'd1; + group run_multiply0 { + a.addr0 = comp_multiply0.a0_addr0; + comp_multiply0.a0_read_data = a.read_data; + b.addr0 = comp_multiply0.b0_addr0; + comp_multiply0.b0_read_data = b.read_data; + c.addr0 = comp_multiply0.c0_addr0; + c.write_data = comp_multiply0.c0_write_data; + c.write_en = comp_multiply0.c0_write_en; + comp_multiply0.c0_done = c.done; + comp_multiply0.go = 1'd1; + run_multiply0[done] = comp_multiply0.done ? 1'd1; + } + group run_add0 { + c.addr0 = comp_add0.c0_addr0; + comp_add0.c0_read_data = c.read_data; + a.addr0 = comp_add0.a0_addr0; + comp_add0.a0_read_data = a.read_data; + d.addr0 = comp_add0.d0_addr0; + d.write_data = comp_add0.d0_write_data; + d.write_en = comp_add0.d0_write_en; + comp_add0.d0_done = d.done; + comp_add0.go = 1'd1; + run_add0[done] = comp_add0.done ? 1'd1; } } control { seq { - run_multiply; - run_add; + run_multiply0; + run_add0; } } } diff --git a/frontends/relay-futil/tests/let3.expect b/frontends/relay-futil/tests/let3.expect index 11b79b4180..222268b304 100644 --- a/frontends/relay-futil/tests/let3.expect +++ b/frontends/relay-futil/tests/let3.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component multiply(go: 1, clk: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1, e0_read_data: 32, e0_done: 1) -> (done: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1, e0_addr0: 1, e0_write_data: 32, e0_write_en: 1, e0_clk: 1) { +component multiply0(go: 1, clk: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1, e0_read_data: 32, e0_done: 1) -> (done: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1, e0_addr0: 1, e0_write_data: 32, e0_write_en: 1, e0_clk: 1) { cells { add0 = prim std_add(1); bin_read0_0 = prim std_reg(32); @@ -76,18 +76,18 @@ component multiply(go: 1, clk: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32 } } } -component divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { + +component subtract1(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, c0_done: 1, d0_read_data: 32, d0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1, d0_addr0: 1, d0_write_data: 32, d0_write_en: 1, d0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(1); - bin_read0_0 = prim std_reg(32); c_read0_0 = prim std_reg(32); const0 = prim std_const(1, 0); const1 = prim std_const(1, 0); const2 = prim std_const(1, 1); - div_pipe0 = prim std_div_pipe(32); i0 = prim std_reg(1); le0 = prim std_le(1); + sub0 = prim std_sub(32); } wires { group cond0<"static"=0> { @@ -100,14 +100,6 @@ component divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, i0.write_en = 1'd1; let0[done] = i0.done; } - group let1<> { - bin_read0_0.in = div_pipe0.out; - bin_read0_0.write_en = div_pipe0.done; - let1[done] = bin_read0_0.done; - div_pipe0.left = c_read0_0.out; - div_pipe0.right = a_read0_0.out; - div_pipe0.go = !div_pipe0.done ? 1'd1; - } group upd0<"static"=1> { c_read0_0.write_en = 1'd1; c0_addr0 = i0.out; @@ -123,7 +115,9 @@ component divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, group upd2<"static"=1> { d0_addr0 = i0.out; d0_write_en = 1'd1; - d0_write_data = 1'd1 ? bin_read0_0.out; + sub0.left = c_read0_0.out; + sub0.right = a_read0_0.out; + d0_write_data = 1'd1 ? sub0.out; upd2[done] = d0_done ? 1'd1; } group upd3<"static"=1> { @@ -144,7 +138,6 @@ component divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, upd0; upd1; } - let1; upd2; upd3; } @@ -152,7 +145,8 @@ component divide(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, c0_read_data: 32, } } } -component subtract(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { + +component subtract0(go: 1, clk: 1, a0_read_data: 32, a0_done: 1, b0_read_data: 32, b0_done: 1, c0_read_data: 32, c0_done: 1) -> (done: 1, a0_addr0: 1, a0_write_data: 32, a0_write_en: 1, a0_clk: 1, b0_addr0: 1, b0_write_data: 32, b0_write_en: 1, b0_clk: 1, c0_addr0: 1, c0_write_data: 32, c0_write_en: 1, c0_clk: 1) { cells { a_read0_0 = prim std_reg(32); add0 = prim std_add(1); @@ -226,55 +220,55 @@ component main () -> () { e = prim std_mem_d1(32, 1, 1); c = prim std_mem_d1(32, 1, 1); d = prim std_mem_d1(32, 1, 1); - multiply0 = multiply; + comp_multiply0 = multiply0; a = prim std_mem_d1(32, 1, 1); - divide0 = divide; + comp_subtract1 = subtract1; b = prim std_mem_d1(32, 1, 1); - subtract0 = subtract; + comp_subtract0 = subtract0; } wires { - group run_subtract { - a.addr0 = subtract0.a0_addr0; - subtract0.a0_read_data = a.read_data; - b.addr0 = subtract0.b0_addr0; - subtract0.b0_read_data = b.read_data; - c.addr0 = subtract0.c0_addr0; - c.write_data = subtract0.c0_write_data; - c.write_en = subtract0.c0_write_en; - subtract0.c0_done = c.done; - subtract0.go = 1'd1; - run_subtract[done] = subtract0.done ? 1'd1; + group run_subtract0 { + a.addr0 = comp_subtract0.a0_addr0; + comp_subtract0.a0_read_data = a.read_data; + b.addr0 = comp_subtract0.b0_addr0; + comp_subtract0.b0_read_data = b.read_data; + c.addr0 = comp_subtract0.c0_addr0; + c.write_data = comp_subtract0.c0_write_data; + c.write_en = comp_subtract0.c0_write_en; + comp_subtract0.c0_done = c.done; + comp_subtract0.go = 1'd1; + run_subtract0[done] = comp_subtract0.done ? 1'd1; } - group run_divide { - c.addr0 = divide0.c0_addr0; - divide0.c0_read_data = c.read_data; - a.addr0 = divide0.a0_addr0; - divide0.a0_read_data = a.read_data; - d.addr0 = divide0.d0_addr0; - d.write_data = divide0.d0_write_data; - d.write_en = divide0.d0_write_en; - divide0.d0_done = d.done; - divide0.go = 1'd1; - run_divide[done] = divide0.done ? 1'd1; + group run_subtract1 { + c.addr0 = comp_subtract1.c0_addr0; + comp_subtract1.c0_read_data = c.read_data; + a.addr0 = comp_subtract1.a0_addr0; + comp_subtract1.a0_read_data = a.read_data; + d.addr0 = comp_subtract1.d0_addr0; + d.write_data = comp_subtract1.d0_write_data; + d.write_en = comp_subtract1.d0_write_en; + comp_subtract1.d0_done = d.done; + comp_subtract1.go = 1'd1; + run_subtract1[done] = comp_subtract1.done ? 1'd1; } - group run_multiply { - c.addr0 = multiply0.c0_addr0; - multiply0.c0_read_data = c.read_data; - d.addr0 = multiply0.d0_addr0; - multiply0.d0_read_data = d.read_data; - e.addr0 = multiply0.e0_addr0; - e.write_data = multiply0.e0_write_data; - e.write_en = multiply0.e0_write_en; - multiply0.e0_done = e.done; - multiply0.go = 1'd1; - run_multiply[done] = multiply0.done ? 1'd1; + group run_multiply0 { + c.addr0 = comp_multiply0.c0_addr0; + comp_multiply0.c0_read_data = c.read_data; + d.addr0 = comp_multiply0.d0_addr0; + comp_multiply0.d0_read_data = d.read_data; + e.addr0 = comp_multiply0.e0_addr0; + e.write_data = comp_multiply0.e0_write_data; + e.write_en = comp_multiply0.e0_write_en; + comp_multiply0.e0_done = e.done; + comp_multiply0.go = 1'd1; + run_multiply0[done] = comp_multiply0.done ? 1'd1; } } control { seq { - run_subtract; - run_divide; - run_multiply; + run_subtract0; + run_subtract1; + run_multiply0; } } } diff --git a/frontends/relay-futil/tests/let3.relay b/frontends/relay-futil/tests/let3.relay index 50aa9a8064..725e75ab94 100644 --- a/frontends/relay-futil/tests/let3.relay +++ b/frontends/relay-futil/tests/let3.relay @@ -1,7 +1,7 @@ v0.0.4 fn (%a: int32, %b: int32) { let %c = subtract(%a, %b); - let %d = divide(%c, %a); + let %d = subtract(%c, %a); let %e = multiply(%c, %d); %e } diff --git a/frontends/relay-futil/tests/max_pool2d.expect b/frontends/relay-futil/tests/max_pool2d.expect new file mode 100644 index 0000000000..ee3eb04a8b --- /dev/null +++ b/frontends/relay-futil/tests/max_pool2d.expect @@ -0,0 +1,354 @@ +import "primitives/std.lib"; + +component max_pool2d0(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, result0_0_0_0_read_data: 32, result0_0_0_0_done: 1) -> (done: 1, data0_0_0_0_addr0: 2, data0_0_0_0_addr1: 2, data0_0_0_0_addr2: 3, data0_0_0_0_addr3: 3, data0_0_0_0_write_data: 32, data0_0_0_0_write_en: 1, data0_0_0_0_clk: 1, result0_0_0_0_addr0: 2, result0_0_0_0_addr1: 2, result0_0_0_0_addr2: 2, result0_0_0_0_addr3: 2, result0_0_0_0_write_data: 32, result0_0_0_0_write_en: 1, result0_0_0_0_clk: 1) { + cells { + add0 = prim std_add(32); + add1 = prim std_add(32); + add2 = prim std_add(32); + add3 = prim std_add(32); + add4 = prim std_add(32); + add5 = prim std_add(32); + add6 = prim std_add(32); + add7 = prim std_add(32); + b0 = prim std_reg(32); + bin_read0_0 = prim std_reg(32); + bin_read1_0 = prim std_reg(32); + c0 = prim std_reg(32); + const0 = prim std_const(32, 0); + const1 = prim std_const(32, 1); + const10 = prim std_const(32, 0); + const11 = prim std_const(32, 1); + const12 = prim std_const(32, 0); + const13 = prim std_const(32, 1); + const14 = prim std_const(32, 1); + const15 = prim std_const(32, 1); + const16 = prim std_const(32, 1); + const17 = prim std_const(32, 1); + const18 = prim std_const(32, 1); + const19 = prim std_const(32, 1); + const2 = prim std_const(32, 0); + const3 = prim std_const(32, 1); + const4 = prim std_const(32, 0); + const5 = prim std_const(32, 1); + const6 = prim std_const(32, 0); + const7 = prim std_const(32, 1); + const8 = prim std_const(32, 2); + const9 = prim std_const(32, 2); + current_0 = prim std_reg(32); + gt0 = prim std_gt(32); + le0 = prim std_le(32); + le1 = prim std_le(32); + le2 = prim std_le(32); + le3 = prim std_le(32); + le4 = prim std_le(32); + le5 = prim std_le(32); + m0 = prim std_reg(32); + max_0 = prim std_reg(32); + mult_pipe0 = prim std_mult_pipe(32); + mult_pipe1 = prim std_mult_pipe(32); + n0 = prim std_reg(32); + pool_x_0 = prim std_reg(32); + pool_y_0 = prim std_reg(32); + slice0 = prim std_slice(32, 2); + slice1 = prim std_slice(32, 2); + slice10 = prim std_slice(32, 2); + slice11 = prim std_slice(32, 2); + slice2 = prim std_slice(32, 3); + slice3 = prim std_slice(32, 3); + slice4 = prim std_slice(32, 2); + slice5 = prim std_slice(32, 2); + slice6 = prim std_slice(32, 3); + slice7 = prim std_slice(32, 3); + slice8 = prim std_slice(32, 2); + slice9 = prim std_slice(32, 2); + stride_x_0 = prim std_reg(32); + stride_y_0 = prim std_reg(32); + x0 = prim std_reg(32); + y0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = b0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = c0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = y0.out; + le2.right = const5.out; + } + group cond3<"static"=0> { + cond3[done] = 1'd1; + le3.left = x0.out; + le3.right = const7.out; + } + group cond4<"static"=0> { + cond4[done] = 1'd1; + le4.left = m0.out; + le4.right = const11.out; + } + group cond5<"static"=0> { + cond5[done] = 1'd1; + le5.left = n0.out; + le5.right = const13.out; + } + group cond6<"static"=0> { + cond6[done] = 1'd1; + gt0.left = current_0.out; + gt0.right = max_0.out; + } + group let0<"static"=1> { + b0.in = const0.out; + b0.write_en = 1'd1; + let0[done] = b0.done; + } + group let1<"static"=1> { + c0.in = const2.out; + c0.write_en = 1'd1; + let1[done] = c0.done; + } + group let10<"static"=1> { + pool_y_0.in = add0.out; + pool_y_0.write_en = 1'd1; + let10[done] = pool_y_0.done; + add0.left = stride_y_0.out; + add0.right = m0.out; + } + group let11<"static"=1> { + pool_x_0.in = add1.out; + pool_x_0.write_en = 1'd1; + let11[done] = pool_x_0.done; + add1.left = stride_x_0.out; + add1.right = n0.out; + } + group let2<"static"=1> { + y0.in = const4.out; + y0.write_en = 1'd1; + let2[done] = y0.done; + } + group let3<"static"=1> { + x0.in = const6.out; + x0.write_en = 1'd1; + let3[done] = x0.done; + } + group let4<"static"=4> { + bin_read0_0.in = mult_pipe0.out; + bin_read0_0.write_en = mult_pipe0.done; + let4[done] = bin_read0_0.done; + mult_pipe0.left = y0.out; + mult_pipe0.right = const8.out; + mult_pipe0.go = !mult_pipe0.done ? 1'd1; + } + group let5<"static"=1> { + stride_y_0.in = bin_read0_0.out; + stride_y_0.write_en = 1'd1; + let5[done] = stride_y_0.done; + } + group let6<"static"=4> { + bin_read1_0.in = mult_pipe1.out; + bin_read1_0.write_en = mult_pipe1.done; + let6[done] = bin_read1_0.done; + mult_pipe1.left = x0.out; + mult_pipe1.right = const9.out; + mult_pipe1.go = !mult_pipe1.done ? 1'd1; + } + group let7<"static"=1> { + stride_x_0.in = bin_read1_0.out; + stride_x_0.write_en = 1'd1; + let7[done] = stride_x_0.done; + } + group let8<"static"=1> { + m0.in = const10.out; + m0.write_en = 1'd1; + let8[done] = m0.done; + } + group let9<"static"=1> { + n0.in = const12.out; + n0.write_en = 1'd1; + let9[done] = n0.done; + } + group upd0<"static"=1> { + max_0.write_en = 1'd1; + data0_0_0_0_addr3 = slice3.out; + slice3.in = stride_x_0.out; + data0_0_0_0_addr2 = slice2.out; + slice2.in = stride_y_0.out; + data0_0_0_0_addr1 = slice1.out; + slice1.in = c0.out; + data0_0_0_0_addr0 = slice0.out; + slice0.in = b0.out; + max_0.in = 1'd1 ? data0_0_0_0_read_data; + upd0[done] = max_0.done ? 1'd1; + } + group upd1<"static"=1> { + current_0.write_en = 1'd1; + data0_0_0_0_addr3 = slice7.out; + slice7.in = pool_x_0.out; + data0_0_0_0_addr2 = slice6.out; + slice6.in = pool_y_0.out; + data0_0_0_0_addr1 = slice5.out; + slice5.in = c0.out; + data0_0_0_0_addr0 = slice4.out; + slice4.in = b0.out; + current_0.in = 1'd1 ? data0_0_0_0_read_data; + upd1[done] = current_0.done ? 1'd1; + } + group upd2<"static"=1> { + max_0.write_en = 1'd1; + max_0.in = 1'd1 ? current_0.out; + upd2[done] = max_0.done ? 1'd1; + } + group upd3<"static"=1> { + n0.write_en = 1'd1; + add2.left = n0.out; + add2.right = const14.out; + n0.in = 1'd1 ? add2.out; + upd3[done] = n0.done ? 1'd1; + } + group upd4<"static"=1> { + m0.write_en = 1'd1; + add3.left = m0.out; + add3.right = const15.out; + m0.in = 1'd1 ? add3.out; + upd4[done] = m0.done ? 1'd1; + } + group upd5<"static"=1> { + result0_0_0_0_addr3 = slice11.out; + slice11.in = x0.out; + result0_0_0_0_addr2 = slice10.out; + slice10.in = y0.out; + result0_0_0_0_addr1 = slice9.out; + slice9.in = c0.out; + result0_0_0_0_addr0 = slice8.out; + slice8.in = b0.out; + result0_0_0_0_write_en = 1'd1; + result0_0_0_0_write_data = 1'd1 ? max_0.out; + upd5[done] = result0_0_0_0_done ? 1'd1; + } + group upd6<"static"=1> { + x0.write_en = 1'd1; + add4.left = x0.out; + add4.right = const16.out; + x0.in = 1'd1 ? add4.out; + upd6[done] = x0.done ? 1'd1; + } + group upd7<"static"=1> { + y0.write_en = 1'd1; + add5.left = y0.out; + add5.right = const17.out; + y0.in = 1'd1 ? add5.out; + upd7[done] = y0.done ? 1'd1; + } + group upd8<"static"=1> { + c0.write_en = 1'd1; + add6.left = c0.out; + add6.right = const18.out; + c0.in = 1'd1 ? add6.out; + upd8[done] = c0.done ? 1'd1; + } + group upd9<"static"=1> { + b0.write_en = 1'd1; + add7.left = b0.out; + add7.right = const19.out; + b0.in = 1'd1 ? add7.out; + upd9[done] = b0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + let1; + while le1.out with cond1 { + seq { + let2; + while le2.out with cond2 { + seq { + let3; + while le3.out with cond3 { + seq { + par { + seq { + let4; + let5; + } + seq { + let6; + let7; + } + } + upd0; + let8; + while le4.out with cond4 { + seq { + let9; + while le5.out with cond5 { + seq { + par { + let10; + let11; + } + upd1; + if gt0.out with cond6 { + upd2; + } + upd3; + } + } + upd4; + } + } + upd5; + upd6; + } + } + upd7; + } + } + upd8; + } + } + upd9; + } + } + } + } +} + +component main () -> () { + cells { + result = prim std_mem_d4(32, 2, 2, 2, 2, 2, 2, 2, 2); + data = prim std_mem_d4(32, 2, 2, 4, 4, 2, 2, 3, 3); + comp_max_pool2d0 = max_pool2d0; + } + wires { + group run_max_pool2d0 { + data.addr0 = comp_max_pool2d0.data0_0_0_0_addr0; + comp_max_pool2d0.data0_0_0_0_read_data = data.read_data; + data.addr1 = comp_max_pool2d0.data0_0_0_0_addr1; + data.addr2 = comp_max_pool2d0.data0_0_0_0_addr2; + data.addr3 = comp_max_pool2d0.data0_0_0_0_addr3; + result.addr0 = comp_max_pool2d0.result0_0_0_0_addr0; + result.addr1 = comp_max_pool2d0.result0_0_0_0_addr1; + result.addr2 = comp_max_pool2d0.result0_0_0_0_addr2; + result.addr3 = comp_max_pool2d0.result0_0_0_0_addr3; + result.write_data = comp_max_pool2d0.result0_0_0_0_write_data; + result.write_en = comp_max_pool2d0.result0_0_0_0_write_en; + comp_max_pool2d0.result0_0_0_0_done = result.done; + comp_max_pool2d0.go = 1'd1; + run_max_pool2d0[done] = comp_max_pool2d0.done ? 1'd1; + } + } + control { + seq { + run_max_pool2d0; + } + } +} diff --git a/frontends/relay-futil/tests/max_pool2d.relay b/frontends/relay-futil/tests/max_pool2d.relay new file mode 100644 index 0000000000..e1ba79d351 --- /dev/null +++ b/frontends/relay-futil/tests/max_pool2d.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%data: Tensor[(2, 2, 4, 4), int32]) { + let %result: Tensor[(2, 2, 2, 2), int32] = nn.max_pool2d(%data, pool_size=[2, 2], strides=[2, 2], padding=[0,0,0,0]); + %result +} + diff --git a/frontends/relay-futil/tests/mlp_net.expect b/frontends/relay-futil/tests/mlp_net.expect deleted file mode 100644 index 812a0381fd..0000000000 --- a/frontends/relay-futil/tests/mlp_net.expect +++ /dev/null @@ -1,1791 +0,0 @@ -import "primitives/std.lib"; - -component bias_add2(go: 1, clk: 1, fc3_bias0_read_data: 32, fc3_bias0_done: 1, x70_0_read_data: 32, x70_0_done: 1, x80_0_read_data: 32, x80_0_done: 1) -> (done: 1, fc3_bias0_addr0: 4, fc3_bias0_write_data: 32, fc3_bias0_write_en: 1, fc3_bias0_clk: 1, x70_0_addr0: 1, x70_0_addr1: 4, x70_0_write_data: 32, x70_0_write_en: 1, x70_0_clk: 1, x80_0_addr0: 1, x80_0_addr1: 4, x80_0_write_data: 32, x80_0_write_en: 1, x80_0_clk: 1) { - cells { - add0 = prim fixed_p_std_add(32, 16, 16); - add1 = prim std_add(4); - add2 = prim std_add(1); - const0 = prim std_const(1, 0); - const1 = prim std_const(1, 0); - const2 = prim std_const(4, 0); - const3 = prim std_const(4, 9); - const4 = prim std_const(4, 1); - const5 = prim std_const(1, 1); - fc3_bias_read0_0 = prim std_reg(32); - i0 = prim std_reg(1); - j0 = prim std_reg(4); - le0 = prim std_le(1); - le1 = prim std_le(4); - x7_read0_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; - } - group let1<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let1[done] = j0.done; - } - group upd0<"static"=1> { - x7_read0_0.write_en = 1'd1; - x70_0_addr1 = j0.out; - x70_0_addr0 = i0.out; - x7_read0_0.in = 1'd1 ? x70_0_read_data; - upd0[done] = x7_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - fc3_bias_read0_0.write_en = 1'd1; - fc3_bias0_addr0 = j0.out; - fc3_bias_read0_0.in = 1'd1 ? fc3_bias0_read_data; - upd1[done] = fc3_bias_read0_0.done ? 1'd1; - } - group upd2<"static"=1> { - x80_0_addr1 = j0.out; - x80_0_addr0 = i0.out; - x80_0_write_en = 1'd1; - add0.left = x7_read0_0.out; - add0.right = fc3_bias_read0_0.out; - x80_0_write_data = 1'd1 ? add0.out; - upd2[done] = x80_0_done ? 1'd1; - } - group upd3<"static"=1> { - j0.write_en = 1'd1; - add1.left = j0.out; - add1.right = const4.out; - j0.in = 1'd1 ? add1.out; - upd3[done] = j0.done ? 1'd1; - } - group upd4<"static"=1> { - i0.write_en = 1'd1; - add2.left = i0.out; - add2.right = const5.out; - i0.in = 1'd1 ? add2.out; - upd4[done] = i0.done ? 1'd1; - } - } - - control { - seq { - let0; - while le0.out with cond0 { - seq { - let1; - while le1.out with cond1 { - seq { - par { - upd0; - upd1; - } - upd2; - upd3; - } - } - upd4; - } - } - } - } -} -component dense2(go: 1, clk: 1, fc3_weight0_0_read_data: 32, fc3_weight0_0_done: 1, x60_0_read_data: 32, x60_0_done: 1, x70_0_read_data: 32, x70_0_done: 1) -> (done: 1, fc3_weight0_0_addr0: 4, fc3_weight0_0_addr1: 7, fc3_weight0_0_write_data: 32, fc3_weight0_0_write_en: 1, fc3_weight0_0_clk: 1, x60_0_addr0: 1, x60_0_addr1: 7, x60_0_write_data: 32, x60_0_write_en: 1, x60_0_clk: 1, x70_0_addr0: 1, x70_0_addr1: 4, x70_0_write_data: 32, x70_0_write_en: 1, x70_0_clk: 1) { - cells { - add0 = prim std_add(7); - add1 = prim std_add(4); - add2 = prim fixed_p_std_add(32, 16, 16); - add3 = prim std_add(7); - add4 = prim std_add(4); - add5 = prim std_add(1); - add6 = prim std_add(4); - add7 = prim std_add(1); - bin_read0_0 = prim std_reg(32); - const0 = prim std_const(4, 0); - const1 = prim std_const(4, 9); - const10 = prim std_const(7, 0); - const11 = prim std_const(7, 63); - const12 = prim std_const(7, 1); - const13 = prim std_const(4, 1); - const14 = prim std_const(1, 1); - const15 = prim std_const(1, 0); - const16 = prim std_const(1, 0); - const17 = prim std_const(4, 0); - const18 = prim std_const(4, 9); - const19 = prim std_const(4, 1); - const2 = prim std_const(7, 0); - const20 = prim std_const(1, 1); - const3 = prim std_const(7, 63); - const4 = prim std_const(7, 1); - const5 = prim std_const(4, 1); - const6 = prim std_const(1, 0); - const7 = prim std_const(1, 0); - const8 = prim std_const(4, 0); - const9 = prim std_const(4, 9); - fc3_weight_read0_0 = prim std_reg(32); - i0 = prim std_reg(4); - i1 = prim std_reg(1); - i2 = prim std_reg(1); - j0 = prim std_reg(7); - j1 = prim std_reg(4); - j2 = prim std_reg(4); - k0 = prim std_reg(7); - le0 = prim std_le(4); - le1 = prim std_le(7); - le2 = prim std_le(1); - le3 = prim std_le(4); - le4 = prim std_le(7); - le5 = prim std_le(1); - le6 = prim std_le(4); - mult_pipe0 = prim std_mult_pipe(32); - product_0 = prim std_reg(32); - slice0 = prim std_slice(32, 32); - slice1 = prim std_slice(32, 32); - temporary_x70_0 = prim std_mem_d2(32, 1, 10, 1, 4); - temporary_x7_read0_0 = prim std_reg(32); - transpose_fc3_weight0_0 = prim std_mem_d2(32, 64, 10, 7, 4); - transpose_fc3_weight_read0_0 = prim std_reg(32); - x6_read0_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group cond2<"static"=0> { - cond2[done] = 1'd1; - le2.left = i1.out; - le2.right = const7.out; - } - group cond3<"static"=0> { - cond3[done] = 1'd1; - le3.left = j1.out; - le3.right = const9.out; - } - group cond4<"static"=0> { - cond4[done] = 1'd1; - le4.left = k0.out; - le4.right = const11.out; - } - group cond5<"static"=0> { - cond5[done] = 1'd1; - le5.left = i2.out; - le5.right = const16.out; - } - group cond6<"static"=0> { - cond6[done] = 1'd1; - le6.left = j2.out; - le6.right = const18.out; - } - group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; - } - group let1<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let1[done] = j0.done; - } - group let2<"static"=1> { - i1.in = const6.out; - i1.write_en = 1'd1; - let2[done] = i1.done; - } - group let3<"static"=1> { - j1.in = const8.out; - j1.write_en = 1'd1; - let3[done] = j1.done; - } - group let4<"static"=1> { - k0.in = const10.out; - k0.write_en = 1'd1; - let4[done] = k0.done; - } - group let5<"static"=1> { - bin_read0_0.in = slice0.out; - bin_read0_0.write_en = 1'd1; - let5[done] = bin_read0_0.done; - slice0.in = mult_pipe0.out; - mult_pipe0.left = x6_read0_0.out; - mult_pipe0.right = transpose_fc3_weight_read0_0.out; - mult_pipe0.go = !mult_pipe0.done ? 1'd1; - } - group let6<"static"=1> { - product_0.in = slice1.out; - product_0.write_en = 1'd1; - let6[done] = product_0.done; - slice1.in = bin_read0_0.out; - } - group let7<"static"=1> { - i2.in = const15.out; - i2.write_en = 1'd1; - let7[done] = i2.done; - } - group let8<"static"=1> { - j2.in = const17.out; - j2.write_en = 1'd1; - let8[done] = j2.done; - } - group upd0<"static"=1> { - fc3_weight_read0_0.write_en = 1'd1; - fc3_weight0_0_addr1 = j0.out; - fc3_weight0_0_addr0 = i0.out; - fc3_weight_read0_0.in = 1'd1 ? fc3_weight0_0_read_data; - upd0[done] = fc3_weight_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - transpose_fc3_weight0_0.addr1 = i0.out; - transpose_fc3_weight0_0.addr0 = j0.out; - transpose_fc3_weight0_0.write_en = 1'd1; - transpose_fc3_weight0_0.write_data = 1'd1 ? fc3_weight_read0_0.out; - upd1[done] = transpose_fc3_weight0_0.done ? 1'd1; - } - group upd10<"static"=1> { - temporary_x7_read0_0.write_en = 1'd1; - temporary_x70_0.addr1 = j2.out; - temporary_x70_0.addr0 = i2.out; - temporary_x7_read0_0.in = 1'd1 ? temporary_x70_0.read_data; - upd10[done] = temporary_x7_read0_0.done ? 1'd1; - } - group upd11<"static"=1> { - x70_0_addr1 = j2.out; - x70_0_addr0 = i2.out; - x70_0_write_en = 1'd1; - x70_0_write_data = 1'd1 ? temporary_x7_read0_0.out; - upd11[done] = x70_0_done ? 1'd1; - } - group upd12<"static"=1> { - j2.write_en = 1'd1; - add6.left = j2.out; - add6.right = const19.out; - j2.in = 1'd1 ? add6.out; - upd12[done] = j2.done ? 1'd1; - } - group upd13<"static"=1> { - i2.write_en = 1'd1; - add7.left = i2.out; - add7.right = const20.out; - i2.in = 1'd1 ? add7.out; - upd13[done] = i2.done ? 1'd1; - } - group upd2<"static"=1> { - j0.write_en = 1'd1; - add0.left = j0.out; - add0.right = const4.out; - j0.in = 1'd1 ? add0.out; - upd2[done] = j0.done ? 1'd1; - } - group upd3<"static"=1> { - i0.write_en = 1'd1; - add1.left = i0.out; - add1.right = const5.out; - i0.in = 1'd1 ? add1.out; - upd3[done] = i0.done ? 1'd1; - } - group upd4<"static"=1> { - x6_read0_0.write_en = 1'd1; - x60_0_addr1 = k0.out; - x60_0_addr0 = i1.out; - x6_read0_0.in = 1'd1 ? x60_0_read_data; - upd4[done] = x6_read0_0.done ? 1'd1; - } - group upd5<"static"=1> { - transpose_fc3_weight_read0_0.write_en = 1'd1; - transpose_fc3_weight0_0.addr1 = j1.out; - transpose_fc3_weight0_0.addr0 = k0.out; - transpose_fc3_weight_read0_0.in = 1'd1 ? transpose_fc3_weight0_0.read_data; - upd5[done] = transpose_fc3_weight_read0_0.done ? 1'd1; - } - group upd6<"static"=1> { - temporary_x70_0.addr1 = j1.out; - temporary_x70_0.addr0 = i1.out; - temporary_x70_0.write_en = 1'd1; - add2.left = temporary_x70_0.read_data; - add2.right = product_0.out; - temporary_x70_0.addr1 = j1.out; - temporary_x70_0.addr0 = i1.out; - temporary_x70_0.write_data = 1'd1 ? add2.out; - upd6[done] = temporary_x70_0.done ? 1'd1; - } - group upd7<"static"=1> { - k0.write_en = 1'd1; - add3.left = k0.out; - add3.right = const12.out; - k0.in = 1'd1 ? add3.out; - upd7[done] = k0.done ? 1'd1; - } - group upd8<"static"=1> { - j1.write_en = 1'd1; - add4.left = j1.out; - add4.right = const13.out; - j1.in = 1'd1 ? add4.out; - upd8[done] = j1.done ? 1'd1; - } - group upd9<"static"=1> { - i1.write_en = 1'd1; - add5.left = i1.out; - add5.right = const14.out; - i1.in = 1'd1 ? add5.out; - upd9[done] = i1.done ? 1'd1; - } - } - - control { - seq { - let0; - while le0.out with cond0 { - seq { - let1; - while le1.out with cond1 { - seq { - upd0; - upd1; - upd2; - } - } - upd3; - } - } - let2; - while le2.out with cond2 { - seq { - let3; - while le3.out with cond3 { - seq { - let4; - while le4.out with cond4 { - seq { - par { - upd4; - upd5; - } - let5; - let6; - upd6; - upd7; - } - } - upd8; - } - } - upd9; - } - } - let7; - while le5.out with cond5 { - seq { - let8; - while le6.out with cond6 { - seq { - upd10; - upd11; - upd12; - } - } - upd13; - } - } - } - } -} -component relu1(go: 1, clk: 1, x50_0_read_data: 32, x50_0_done: 1, x60_0_read_data: 32, x60_0_done: 1) -> (done: 1, x50_0_addr0: 1, x50_0_addr1: 7, x50_0_write_data: 32, x50_0_write_en: 1, x50_0_clk: 1, x60_0_addr0: 1, x60_0_addr1: 7, x60_0_write_data: 32, x60_0_write_en: 1, x60_0_clk: 1) { - cells { - add0 = prim std_add(7); - add1 = prim std_add(1); - const0 = prim std_const(1, 0); - const1 = prim std_const(1, 0); - const2 = prim std_const(7, 0); - const3 = prim std_const(7, 63); - const4 = prim std_const(7, 1); - const5 = prim std_const(1, 1); - fpconst0 = prim fixed_p_std_const(32, 16, 16, 0, 0); - gt0 = prim fixed_p_std_gt(32, 16, 16); - i0 = prim std_reg(1); - j0 = prim std_reg(7); - le0 = prim std_le(1); - le1 = prim std_le(7); - x5_read0_0 = prim std_reg(32); - x5_read1_0 = prim std_reg(32); - zero_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group cond2<"static"=0> { - cond2[done] = 1'd1; - gt0.left = x5_read0_0.out; - gt0.right = zero_0.out; - } - group let0<"static"=1> { - zero_0.in = fpconst0.out; - zero_0.write_en = 1'd1; - let0[done] = zero_0.done; - } - group let1<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let1[done] = i0.done; - } - group let2<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let2[done] = j0.done; - } - group upd0<"static"=1> { - x5_read0_0.write_en = 1'd1; - x50_0_addr1 = j0.out; - x50_0_addr0 = i0.out; - x5_read0_0.in = 1'd1 ? x50_0_read_data; - upd0[done] = x5_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - x5_read1_0.write_en = 1'd1; - x50_0_addr1 = j0.out; - x50_0_addr0 = i0.out; - x5_read1_0.in = 1'd1 ? x50_0_read_data; - upd1[done] = x5_read1_0.done ? 1'd1; - } - group upd2<"static"=1> { - x60_0_addr1 = j0.out; - x60_0_addr0 = i0.out; - x60_0_write_en = 1'd1; - x60_0_write_data = 1'd1 ? x5_read1_0.out; - upd2[done] = x60_0_done ? 1'd1; - } - group upd3<"static"=1> { - x60_0_addr1 = j0.out; - x60_0_addr0 = i0.out; - x60_0_write_en = 1'd1; - x60_0_write_data = 1'd1 ? zero_0.out; - upd3[done] = x60_0_done ? 1'd1; - } - group upd4<"static"=1> { - j0.write_en = 1'd1; - add0.left = j0.out; - add0.right = const4.out; - j0.in = 1'd1 ? add0.out; - upd4[done] = j0.done ? 1'd1; - } - group upd5<"static"=1> { - i0.write_en = 1'd1; - add1.left = i0.out; - add1.right = const5.out; - i0.in = 1'd1 ? add1.out; - upd5[done] = i0.done ? 1'd1; - } - } - - control { - seq { - let0; - let1; - while le0.out with cond0 { - seq { - let2; - while le1.out with cond1 { - seq { - upd0; - if gt0.out with cond2 { - seq { - upd1; - upd2; - } - } else { - upd3; - } - upd4; - } - } - upd5; - } - } - } - } -} -component bias_add1(go: 1, clk: 1, fc2_bias0_read_data: 32, fc2_bias0_done: 1, x40_0_read_data: 32, x40_0_done: 1, x50_0_read_data: 32, x50_0_done: 1) -> (done: 1, fc2_bias0_addr0: 7, fc2_bias0_write_data: 32, fc2_bias0_write_en: 1, fc2_bias0_clk: 1, x40_0_addr0: 1, x40_0_addr1: 7, x40_0_write_data: 32, x40_0_write_en: 1, x40_0_clk: 1, x50_0_addr0: 1, x50_0_addr1: 7, x50_0_write_data: 32, x50_0_write_en: 1, x50_0_clk: 1) { - cells { - add0 = prim fixed_p_std_add(32, 16, 16); - add1 = prim std_add(7); - add2 = prim std_add(1); - const0 = prim std_const(1, 0); - const1 = prim std_const(1, 0); - const2 = prim std_const(7, 0); - const3 = prim std_const(7, 63); - const4 = prim std_const(7, 1); - const5 = prim std_const(1, 1); - fc2_bias_read0_0 = prim std_reg(32); - i0 = prim std_reg(1); - j0 = prim std_reg(7); - le0 = prim std_le(1); - le1 = prim std_le(7); - x4_read0_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; - } - group let1<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let1[done] = j0.done; - } - group upd0<"static"=1> { - x4_read0_0.write_en = 1'd1; - x40_0_addr1 = j0.out; - x40_0_addr0 = i0.out; - x4_read0_0.in = 1'd1 ? x40_0_read_data; - upd0[done] = x4_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - fc2_bias_read0_0.write_en = 1'd1; - fc2_bias0_addr0 = j0.out; - fc2_bias_read0_0.in = 1'd1 ? fc2_bias0_read_data; - upd1[done] = fc2_bias_read0_0.done ? 1'd1; - } - group upd2<"static"=1> { - x50_0_addr1 = j0.out; - x50_0_addr0 = i0.out; - x50_0_write_en = 1'd1; - add0.left = x4_read0_0.out; - add0.right = fc2_bias_read0_0.out; - x50_0_write_data = 1'd1 ? add0.out; - upd2[done] = x50_0_done ? 1'd1; - } - group upd3<"static"=1> { - j0.write_en = 1'd1; - add1.left = j0.out; - add1.right = const4.out; - j0.in = 1'd1 ? add1.out; - upd3[done] = j0.done ? 1'd1; - } - group upd4<"static"=1> { - i0.write_en = 1'd1; - add2.left = i0.out; - add2.right = const5.out; - i0.in = 1'd1 ? add2.out; - upd4[done] = i0.done ? 1'd1; - } - } - - control { - seq { - let0; - while le0.out with cond0 { - seq { - let1; - while le1.out with cond1 { - seq { - par { - upd0; - upd1; - } - upd2; - upd3; - } - } - upd4; - } - } - } - } -} -component dense1(go: 1, clk: 1, fc2_weight0_0_read_data: 32, fc2_weight0_0_done: 1, x30_0_read_data: 32, x30_0_done: 1, x40_0_read_data: 32, x40_0_done: 1) -> (done: 1, fc2_weight0_0_addr0: 7, fc2_weight0_0_addr1: 8, fc2_weight0_0_write_data: 32, fc2_weight0_0_write_en: 1, fc2_weight0_0_clk: 1, x30_0_addr0: 1, x30_0_addr1: 8, x30_0_write_data: 32, x30_0_write_en: 1, x30_0_clk: 1, x40_0_addr0: 1, x40_0_addr1: 7, x40_0_write_data: 32, x40_0_write_en: 1, x40_0_clk: 1) { - cells { - add0 = prim std_add(8); - add1 = prim std_add(7); - add2 = prim fixed_p_std_add(32, 16, 16); - add3 = prim std_add(8); - add4 = prim std_add(7); - add5 = prim std_add(1); - add6 = prim std_add(7); - add7 = prim std_add(1); - bin_read0_0 = prim std_reg(32); - const0 = prim std_const(7, 0); - const1 = prim std_const(7, 63); - const10 = prim std_const(8, 0); - const11 = prim std_const(8, 127); - const12 = prim std_const(8, 1); - const13 = prim std_const(7, 1); - const14 = prim std_const(1, 1); - const15 = prim std_const(1, 0); - const16 = prim std_const(1, 0); - const17 = prim std_const(7, 0); - const18 = prim std_const(7, 63); - const19 = prim std_const(7, 1); - const2 = prim std_const(8, 0); - const20 = prim std_const(1, 1); - const3 = prim std_const(8, 127); - const4 = prim std_const(8, 1); - const5 = prim std_const(7, 1); - const6 = prim std_const(1, 0); - const7 = prim std_const(1, 0); - const8 = prim std_const(7, 0); - const9 = prim std_const(7, 63); - fc2_weight_read0_0 = prim std_reg(32); - i0 = prim std_reg(7); - i1 = prim std_reg(1); - i2 = prim std_reg(1); - j0 = prim std_reg(8); - j1 = prim std_reg(7); - j2 = prim std_reg(7); - k0 = prim std_reg(8); - le0 = prim std_le(7); - le1 = prim std_le(8); - le2 = prim std_le(1); - le3 = prim std_le(7); - le4 = prim std_le(8); - le5 = prim std_le(1); - le6 = prim std_le(7); - mult_pipe0 = prim std_mult_pipe(32); - product_0 = prim std_reg(32); - slice0 = prim std_slice(32, 32); - slice1 = prim std_slice(32, 32); - temporary_x40_0 = prim std_mem_d2(32, 1, 64, 1, 7); - temporary_x4_read0_0 = prim std_reg(32); - transpose_fc2_weight0_0 = prim std_mem_d2(32, 128, 64, 8, 7); - transpose_fc2_weight_read0_0 = prim std_reg(32); - x3_read0_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group cond2<"static"=0> { - cond2[done] = 1'd1; - le2.left = i1.out; - le2.right = const7.out; - } - group cond3<"static"=0> { - cond3[done] = 1'd1; - le3.left = j1.out; - le3.right = const9.out; - } - group cond4<"static"=0> { - cond4[done] = 1'd1; - le4.left = k0.out; - le4.right = const11.out; - } - group cond5<"static"=0> { - cond5[done] = 1'd1; - le5.left = i2.out; - le5.right = const16.out; - } - group cond6<"static"=0> { - cond6[done] = 1'd1; - le6.left = j2.out; - le6.right = const18.out; - } - group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; - } - group let1<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let1[done] = j0.done; - } - group let2<"static"=1> { - i1.in = const6.out; - i1.write_en = 1'd1; - let2[done] = i1.done; - } - group let3<"static"=1> { - j1.in = const8.out; - j1.write_en = 1'd1; - let3[done] = j1.done; - } - group let4<"static"=1> { - k0.in = const10.out; - k0.write_en = 1'd1; - let4[done] = k0.done; - } - group let5<"static"=1> { - bin_read0_0.in = slice0.out; - bin_read0_0.write_en = 1'd1; - let5[done] = bin_read0_0.done; - slice0.in = mult_pipe0.out; - mult_pipe0.left = x3_read0_0.out; - mult_pipe0.right = transpose_fc2_weight_read0_0.out; - mult_pipe0.go = !mult_pipe0.done ? 1'd1; - } - group let6<"static"=1> { - product_0.in = slice1.out; - product_0.write_en = 1'd1; - let6[done] = product_0.done; - slice1.in = bin_read0_0.out; - } - group let7<"static"=1> { - i2.in = const15.out; - i2.write_en = 1'd1; - let7[done] = i2.done; - } - group let8<"static"=1> { - j2.in = const17.out; - j2.write_en = 1'd1; - let8[done] = j2.done; - } - group upd0<"static"=1> { - fc2_weight_read0_0.write_en = 1'd1; - fc2_weight0_0_addr1 = j0.out; - fc2_weight0_0_addr0 = i0.out; - fc2_weight_read0_0.in = 1'd1 ? fc2_weight0_0_read_data; - upd0[done] = fc2_weight_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - transpose_fc2_weight0_0.addr1 = i0.out; - transpose_fc2_weight0_0.addr0 = j0.out; - transpose_fc2_weight0_0.write_en = 1'd1; - transpose_fc2_weight0_0.write_data = 1'd1 ? fc2_weight_read0_0.out; - upd1[done] = transpose_fc2_weight0_0.done ? 1'd1; - } - group upd10<"static"=1> { - temporary_x4_read0_0.write_en = 1'd1; - temporary_x40_0.addr1 = j2.out; - temporary_x40_0.addr0 = i2.out; - temporary_x4_read0_0.in = 1'd1 ? temporary_x40_0.read_data; - upd10[done] = temporary_x4_read0_0.done ? 1'd1; - } - group upd11<"static"=1> { - x40_0_addr1 = j2.out; - x40_0_addr0 = i2.out; - x40_0_write_en = 1'd1; - x40_0_write_data = 1'd1 ? temporary_x4_read0_0.out; - upd11[done] = x40_0_done ? 1'd1; - } - group upd12<"static"=1> { - j2.write_en = 1'd1; - add6.left = j2.out; - add6.right = const19.out; - j2.in = 1'd1 ? add6.out; - upd12[done] = j2.done ? 1'd1; - } - group upd13<"static"=1> { - i2.write_en = 1'd1; - add7.left = i2.out; - add7.right = const20.out; - i2.in = 1'd1 ? add7.out; - upd13[done] = i2.done ? 1'd1; - } - group upd2<"static"=1> { - j0.write_en = 1'd1; - add0.left = j0.out; - add0.right = const4.out; - j0.in = 1'd1 ? add0.out; - upd2[done] = j0.done ? 1'd1; - } - group upd3<"static"=1> { - i0.write_en = 1'd1; - add1.left = i0.out; - add1.right = const5.out; - i0.in = 1'd1 ? add1.out; - upd3[done] = i0.done ? 1'd1; - } - group upd4<"static"=1> { - x3_read0_0.write_en = 1'd1; - x30_0_addr1 = k0.out; - x30_0_addr0 = i1.out; - x3_read0_0.in = 1'd1 ? x30_0_read_data; - upd4[done] = x3_read0_0.done ? 1'd1; - } - group upd5<"static"=1> { - transpose_fc2_weight_read0_0.write_en = 1'd1; - transpose_fc2_weight0_0.addr1 = j1.out; - transpose_fc2_weight0_0.addr0 = k0.out; - transpose_fc2_weight_read0_0.in = 1'd1 ? transpose_fc2_weight0_0.read_data; - upd5[done] = transpose_fc2_weight_read0_0.done ? 1'd1; - } - group upd6<"static"=1> { - temporary_x40_0.addr1 = j1.out; - temporary_x40_0.addr0 = i1.out; - temporary_x40_0.write_en = 1'd1; - add2.left = temporary_x40_0.read_data; - add2.right = product_0.out; - temporary_x40_0.addr1 = j1.out; - temporary_x40_0.addr0 = i1.out; - temporary_x40_0.write_data = 1'd1 ? add2.out; - upd6[done] = temporary_x40_0.done ? 1'd1; - } - group upd7<"static"=1> { - k0.write_en = 1'd1; - add3.left = k0.out; - add3.right = const12.out; - k0.in = 1'd1 ? add3.out; - upd7[done] = k0.done ? 1'd1; - } - group upd8<"static"=1> { - j1.write_en = 1'd1; - add4.left = j1.out; - add4.right = const13.out; - j1.in = 1'd1 ? add4.out; - upd8[done] = j1.done ? 1'd1; - } - group upd9<"static"=1> { - i1.write_en = 1'd1; - add5.left = i1.out; - add5.right = const14.out; - i1.in = 1'd1 ? add5.out; - upd9[done] = i1.done ? 1'd1; - } - } - - control { - seq { - let0; - while le0.out with cond0 { - seq { - let1; - while le1.out with cond1 { - seq { - upd0; - upd1; - upd2; - } - } - upd3; - } - } - let2; - while le2.out with cond2 { - seq { - let3; - while le3.out with cond3 { - seq { - let4; - while le4.out with cond4 { - seq { - par { - upd4; - upd5; - } - let5; - let6; - upd6; - upd7; - } - } - upd8; - } - } - upd9; - } - } - let7; - while le5.out with cond5 { - seq { - let8; - while le6.out with cond6 { - seq { - upd10; - upd11; - upd12; - } - } - upd13; - } - } - } - } -} -component relu(go: 1, clk: 1, x20_0_read_data: 32, x20_0_done: 1, x30_0_read_data: 32, x30_0_done: 1) -> (done: 1, x20_0_addr0: 1, x20_0_addr1: 8, x20_0_write_data: 32, x20_0_write_en: 1, x20_0_clk: 1, x30_0_addr0: 1, x30_0_addr1: 8, x30_0_write_data: 32, x30_0_write_en: 1, x30_0_clk: 1) { - cells { - add0 = prim std_add(8); - add1 = prim std_add(1); - const0 = prim std_const(1, 0); - const1 = prim std_const(1, 0); - const2 = prim std_const(8, 0); - const3 = prim std_const(8, 127); - const4 = prim std_const(8, 1); - const5 = prim std_const(1, 1); - fpconst0 = prim fixed_p_std_const(32, 16, 16, 0, 0); - gt0 = prim fixed_p_std_gt(32, 16, 16); - i0 = prim std_reg(1); - j0 = prim std_reg(8); - le0 = prim std_le(1); - le1 = prim std_le(8); - x2_read0_0 = prim std_reg(32); - x2_read1_0 = prim std_reg(32); - zero_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group cond2<"static"=0> { - cond2[done] = 1'd1; - gt0.left = x2_read0_0.out; - gt0.right = zero_0.out; - } - group let0<"static"=1> { - zero_0.in = fpconst0.out; - zero_0.write_en = 1'd1; - let0[done] = zero_0.done; - } - group let1<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let1[done] = i0.done; - } - group let2<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let2[done] = j0.done; - } - group upd0<"static"=1> { - x2_read0_0.write_en = 1'd1; - x20_0_addr1 = j0.out; - x20_0_addr0 = i0.out; - x2_read0_0.in = 1'd1 ? x20_0_read_data; - upd0[done] = x2_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - x2_read1_0.write_en = 1'd1; - x20_0_addr1 = j0.out; - x20_0_addr0 = i0.out; - x2_read1_0.in = 1'd1 ? x20_0_read_data; - upd1[done] = x2_read1_0.done ? 1'd1; - } - group upd2<"static"=1> { - x30_0_addr1 = j0.out; - x30_0_addr0 = i0.out; - x30_0_write_en = 1'd1; - x30_0_write_data = 1'd1 ? x2_read1_0.out; - upd2[done] = x30_0_done ? 1'd1; - } - group upd3<"static"=1> { - x30_0_addr1 = j0.out; - x30_0_addr0 = i0.out; - x30_0_write_en = 1'd1; - x30_0_write_data = 1'd1 ? zero_0.out; - upd3[done] = x30_0_done ? 1'd1; - } - group upd4<"static"=1> { - j0.write_en = 1'd1; - add0.left = j0.out; - add0.right = const4.out; - j0.in = 1'd1 ? add0.out; - upd4[done] = j0.done ? 1'd1; - } - group upd5<"static"=1> { - i0.write_en = 1'd1; - add1.left = i0.out; - add1.right = const5.out; - i0.in = 1'd1 ? add1.out; - upd5[done] = i0.done ? 1'd1; - } - } - - control { - seq { - let0; - let1; - while le0.out with cond0 { - seq { - let2; - while le1.out with cond1 { - seq { - upd0; - if gt0.out with cond2 { - seq { - upd1; - upd2; - } - } else { - upd3; - } - upd4; - } - } - upd5; - } - } - } - } -} -component bias_add(go: 1, clk: 1, fc1_bias0_read_data: 32, fc1_bias0_done: 1, x10_0_read_data: 32, x10_0_done: 1, x20_0_read_data: 32, x20_0_done: 1) -> (done: 1, fc1_bias0_addr0: 8, fc1_bias0_write_data: 32, fc1_bias0_write_en: 1, fc1_bias0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 8, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, x20_0_addr0: 1, x20_0_addr1: 8, x20_0_write_data: 32, x20_0_write_en: 1, x20_0_clk: 1) { - cells { - add0 = prim fixed_p_std_add(32, 16, 16); - add1 = prim std_add(8); - add2 = prim std_add(1); - const0 = prim std_const(1, 0); - const1 = prim std_const(1, 0); - const2 = prim std_const(8, 0); - const3 = prim std_const(8, 127); - const4 = prim std_const(8, 1); - const5 = prim std_const(1, 1); - fc1_bias_read0_0 = prim std_reg(32); - i0 = prim std_reg(1); - j0 = prim std_reg(8); - le0 = prim std_le(1); - le1 = prim std_le(8); - x1_read0_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; - } - group let1<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let1[done] = j0.done; - } - group upd0<"static"=1> { - x1_read0_0.write_en = 1'd1; - x10_0_addr1 = j0.out; - x10_0_addr0 = i0.out; - x1_read0_0.in = 1'd1 ? x10_0_read_data; - upd0[done] = x1_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - fc1_bias_read0_0.write_en = 1'd1; - fc1_bias0_addr0 = j0.out; - fc1_bias_read0_0.in = 1'd1 ? fc1_bias0_read_data; - upd1[done] = fc1_bias_read0_0.done ? 1'd1; - } - group upd2<"static"=1> { - x20_0_addr1 = j0.out; - x20_0_addr0 = i0.out; - x20_0_write_en = 1'd1; - add0.left = x1_read0_0.out; - add0.right = fc1_bias_read0_0.out; - x20_0_write_data = 1'd1 ? add0.out; - upd2[done] = x20_0_done ? 1'd1; - } - group upd3<"static"=1> { - j0.write_en = 1'd1; - add1.left = j0.out; - add1.right = const4.out; - j0.in = 1'd1 ? add1.out; - upd3[done] = j0.done ? 1'd1; - } - group upd4<"static"=1> { - i0.write_en = 1'd1; - add2.left = i0.out; - add2.right = const5.out; - i0.in = 1'd1 ? add2.out; - upd4[done] = i0.done ? 1'd1; - } - } - - control { - seq { - let0; - while le0.out with cond0 { - seq { - let1; - while le1.out with cond1 { - seq { - par { - upd0; - upd1; - } - upd2; - upd3; - } - } - upd4; - } - } - } - } -} -component dense(go: 1, clk: 1, fc1_weight0_0_read_data: 32, fc1_weight0_0_done: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, fc1_weight0_0_addr0: 8, fc1_weight0_0_addr1: 10, fc1_weight0_0_write_data: 32, fc1_weight0_0_write_en: 1, fc1_weight0_0_clk: 1, x0_0_addr0: 1, x0_0_addr1: 10, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 8, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { - cells { - add0 = prim std_add(10); - add1 = prim std_add(8); - add2 = prim fixed_p_std_add(32, 16, 16); - add3 = prim std_add(10); - add4 = prim std_add(8); - add5 = prim std_add(1); - add6 = prim std_add(8); - add7 = prim std_add(1); - bin_read0_0 = prim std_reg(32); - const0 = prim std_const(8, 0); - const1 = prim std_const(8, 127); - const10 = prim std_const(10, 0); - const11 = prim std_const(10, 783); - const12 = prim std_const(10, 1); - const13 = prim std_const(8, 1); - const14 = prim std_const(1, 1); - const15 = prim std_const(1, 0); - const16 = prim std_const(1, 0); - const17 = prim std_const(8, 0); - const18 = prim std_const(8, 127); - const19 = prim std_const(8, 1); - const2 = prim std_const(10, 0); - const20 = prim std_const(1, 1); - const3 = prim std_const(10, 783); - const4 = prim std_const(10, 1); - const5 = prim std_const(8, 1); - const6 = prim std_const(1, 0); - const7 = prim std_const(1, 0); - const8 = prim std_const(8, 0); - const9 = prim std_const(8, 127); - fc1_weight_read0_0 = prim std_reg(32); - i0 = prim std_reg(8); - i1 = prim std_reg(1); - i2 = prim std_reg(1); - j0 = prim std_reg(10); - j1 = prim std_reg(8); - j2 = prim std_reg(8); - k0 = prim std_reg(10); - le0 = prim std_le(8); - le1 = prim std_le(10); - le2 = prim std_le(1); - le3 = prim std_le(8); - le4 = prim std_le(10); - le5 = prim std_le(1); - le6 = prim std_le(8); - mult_pipe0 = prim std_mult_pipe(32); - product_0 = prim std_reg(32); - slice0 = prim std_slice(32, 32); - slice1 = prim std_slice(32, 32); - temporary_x10_0 = prim std_mem_d2(32, 1, 128, 1, 8); - temporary_x1_read0_0 = prim std_reg(32); - transpose_fc1_weight0_0 = prim std_mem_d2(32, 784, 128, 10, 8); - transpose_fc1_weight_read0_0 = prim std_reg(32); - x_read0_0 = prim std_reg(32); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const1.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const3.out; - } - group cond2<"static"=0> { - cond2[done] = 1'd1; - le2.left = i1.out; - le2.right = const7.out; - } - group cond3<"static"=0> { - cond3[done] = 1'd1; - le3.left = j1.out; - le3.right = const9.out; - } - group cond4<"static"=0> { - cond4[done] = 1'd1; - le4.left = k0.out; - le4.right = const11.out; - } - group cond5<"static"=0> { - cond5[done] = 1'd1; - le5.left = i2.out; - le5.right = const16.out; - } - group cond6<"static"=0> { - cond6[done] = 1'd1; - le6.left = j2.out; - le6.right = const18.out; - } - group let0<"static"=1> { - i0.in = const0.out; - i0.write_en = 1'd1; - let0[done] = i0.done; - } - group let1<"static"=1> { - j0.in = const2.out; - j0.write_en = 1'd1; - let1[done] = j0.done; - } - group let2<"static"=1> { - i1.in = const6.out; - i1.write_en = 1'd1; - let2[done] = i1.done; - } - group let3<"static"=1> { - j1.in = const8.out; - j1.write_en = 1'd1; - let3[done] = j1.done; - } - group let4<"static"=1> { - k0.in = const10.out; - k0.write_en = 1'd1; - let4[done] = k0.done; - } - group let5<"static"=1> { - bin_read0_0.in = slice0.out; - bin_read0_0.write_en = 1'd1; - let5[done] = bin_read0_0.done; - slice0.in = mult_pipe0.out; - mult_pipe0.left = x_read0_0.out; - mult_pipe0.right = transpose_fc1_weight_read0_0.out; - mult_pipe0.go = !mult_pipe0.done ? 1'd1; - } - group let6<"static"=1> { - product_0.in = slice1.out; - product_0.write_en = 1'd1; - let6[done] = product_0.done; - slice1.in = bin_read0_0.out; - } - group let7<"static"=1> { - i2.in = const15.out; - i2.write_en = 1'd1; - let7[done] = i2.done; - } - group let8<"static"=1> { - j2.in = const17.out; - j2.write_en = 1'd1; - let8[done] = j2.done; - } - group upd0<"static"=1> { - fc1_weight_read0_0.write_en = 1'd1; - fc1_weight0_0_addr1 = j0.out; - fc1_weight0_0_addr0 = i0.out; - fc1_weight_read0_0.in = 1'd1 ? fc1_weight0_0_read_data; - upd0[done] = fc1_weight_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - transpose_fc1_weight0_0.addr1 = i0.out; - transpose_fc1_weight0_0.addr0 = j0.out; - transpose_fc1_weight0_0.write_en = 1'd1; - transpose_fc1_weight0_0.write_data = 1'd1 ? fc1_weight_read0_0.out; - upd1[done] = transpose_fc1_weight0_0.done ? 1'd1; - } - group upd10<"static"=1> { - temporary_x1_read0_0.write_en = 1'd1; - temporary_x10_0.addr1 = j2.out; - temporary_x10_0.addr0 = i2.out; - temporary_x1_read0_0.in = 1'd1 ? temporary_x10_0.read_data; - upd10[done] = temporary_x1_read0_0.done ? 1'd1; - } - group upd11<"static"=1> { - x10_0_addr1 = j2.out; - x10_0_addr0 = i2.out; - x10_0_write_en = 1'd1; - x10_0_write_data = 1'd1 ? temporary_x1_read0_0.out; - upd11[done] = x10_0_done ? 1'd1; - } - group upd12<"static"=1> { - j2.write_en = 1'd1; - add6.left = j2.out; - add6.right = const19.out; - j2.in = 1'd1 ? add6.out; - upd12[done] = j2.done ? 1'd1; - } - group upd13<"static"=1> { - i2.write_en = 1'd1; - add7.left = i2.out; - add7.right = const20.out; - i2.in = 1'd1 ? add7.out; - upd13[done] = i2.done ? 1'd1; - } - group upd2<"static"=1> { - j0.write_en = 1'd1; - add0.left = j0.out; - add0.right = const4.out; - j0.in = 1'd1 ? add0.out; - upd2[done] = j0.done ? 1'd1; - } - group upd3<"static"=1> { - i0.write_en = 1'd1; - add1.left = i0.out; - add1.right = const5.out; - i0.in = 1'd1 ? add1.out; - upd3[done] = i0.done ? 1'd1; - } - group upd4<"static"=1> { - x_read0_0.write_en = 1'd1; - x0_0_addr1 = k0.out; - x0_0_addr0 = i1.out; - x_read0_0.in = 1'd1 ? x0_0_read_data; - upd4[done] = x_read0_0.done ? 1'd1; - } - group upd5<"static"=1> { - transpose_fc1_weight_read0_0.write_en = 1'd1; - transpose_fc1_weight0_0.addr1 = j1.out; - transpose_fc1_weight0_0.addr0 = k0.out; - transpose_fc1_weight_read0_0.in = 1'd1 ? transpose_fc1_weight0_0.read_data; - upd5[done] = transpose_fc1_weight_read0_0.done ? 1'd1; - } - group upd6<"static"=1> { - temporary_x10_0.addr1 = j1.out; - temporary_x10_0.addr0 = i1.out; - temporary_x10_0.write_en = 1'd1; - add2.left = temporary_x10_0.read_data; - add2.right = product_0.out; - temporary_x10_0.addr1 = j1.out; - temporary_x10_0.addr0 = i1.out; - temporary_x10_0.write_data = 1'd1 ? add2.out; - upd6[done] = temporary_x10_0.done ? 1'd1; - } - group upd7<"static"=1> { - k0.write_en = 1'd1; - add3.left = k0.out; - add3.right = const12.out; - k0.in = 1'd1 ? add3.out; - upd7[done] = k0.done ? 1'd1; - } - group upd8<"static"=1> { - j1.write_en = 1'd1; - add4.left = j1.out; - add4.right = const13.out; - j1.in = 1'd1 ? add4.out; - upd8[done] = j1.done ? 1'd1; - } - group upd9<"static"=1> { - i1.write_en = 1'd1; - add5.left = i1.out; - add5.right = const14.out; - i1.in = 1'd1 ? add5.out; - upd9[done] = i1.done ? 1'd1; - } - } - - control { - seq { - let0; - while le0.out with cond0 { - seq { - let1; - while le1.out with cond1 { - seq { - upd0; - upd1; - upd2; - } - } - upd3; - } - } - let2; - while le2.out with cond2 { - seq { - let3; - while le3.out with cond3 { - seq { - let4; - while le4.out with cond4 { - seq { - par { - upd4; - upd5; - } - let5; - let6; - upd6; - upd7; - } - } - upd8; - } - } - upd9; - } - } - let7; - while le5.out with cond5 { - seq { - let8; - while le6.out with cond6 { - seq { - upd10; - upd11; - upd12; - } - } - upd13; - } - } - } - } -} -component batch_flatten(go: 1, clk: 1, data0_0_0_0_read_data: 32, data0_0_0_0_done: 1, x0_0_read_data: 32, x0_0_done: 1) -> (done: 1, data0_0_0_0_addr0: 1, data0_0_0_0_addr1: 1, data0_0_0_0_addr2: 5, data0_0_0_0_addr3: 5, data0_0_0_0_write_data: 32, data0_0_0_0_write_en: 1, data0_0_0_0_clk: 1, x0_0_addr0: 1, x0_0_addr1: 10, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1) { - cells { - add0 = prim std_add(10); - add1 = prim std_add(5); - add2 = prim std_add(5); - add3 = prim std_add(1); - add4 = prim std_add(1); - const0 = prim std_const(10, 0); - const1 = prim std_const(1, 0); - const10 = prim std_const(5, 1); - const11 = prim std_const(5, 1); - const12 = prim std_const(1, 1); - const13 = prim std_const(1, 1); - const2 = prim std_const(1, 0); - const3 = prim std_const(1, 0); - const4 = prim std_const(1, 0); - const5 = prim std_const(5, 0); - const6 = prim std_const(5, 27); - const7 = prim std_const(5, 0); - const8 = prim std_const(5, 27); - const9 = prim std_const(10, 1); - data_read0_0 = prim std_reg(32); - i0 = prim std_reg(1); - j0 = prim std_reg(1); - k0 = prim std_reg(5); - l0 = prim std_reg(5); - le0 = prim std_le(1); - le1 = prim std_le(1); - le2 = prim std_le(5); - le3 = prim std_le(5); - m_0 = prim std_reg(10); - } - wires { - group cond0<"static"=0> { - cond0[done] = 1'd1; - le0.left = i0.out; - le0.right = const2.out; - } - group cond1<"static"=0> { - cond1[done] = 1'd1; - le1.left = j0.out; - le1.right = const4.out; - } - group cond2<"static"=0> { - cond2[done] = 1'd1; - le2.left = k0.out; - le2.right = const6.out; - } - group cond3<"static"=0> { - cond3[done] = 1'd1; - le3.left = l0.out; - le3.right = const8.out; - } - group let0<"static"=1> { - m_0.in = const0.out; - m_0.write_en = 1'd1; - let0[done] = m_0.done; - } - group let1<"static"=1> { - i0.in = const1.out; - i0.write_en = 1'd1; - let1[done] = i0.done; - } - group let2<"static"=1> { - j0.in = const3.out; - j0.write_en = 1'd1; - let2[done] = j0.done; - } - group let3<"static"=1> { - k0.in = const5.out; - k0.write_en = 1'd1; - let3[done] = k0.done; - } - group let4<"static"=1> { - l0.in = const7.out; - l0.write_en = 1'd1; - let4[done] = l0.done; - } - group upd0<"static"=1> { - data_read0_0.write_en = 1'd1; - data0_0_0_0_addr3 = l0.out; - data0_0_0_0_addr2 = k0.out; - data0_0_0_0_addr1 = j0.out; - data0_0_0_0_addr0 = i0.out; - data_read0_0.in = 1'd1 ? data0_0_0_0_read_data; - upd0[done] = data_read0_0.done ? 1'd1; - } - group upd1<"static"=1> { - x0_0_addr1 = m_0.out; - x0_0_addr0 = i0.out; - x0_0_write_en = 1'd1; - x0_0_write_data = 1'd1 ? data_read0_0.out; - upd1[done] = x0_0_done ? 1'd1; - } - group upd2<"static"=1> { - m_0.write_en = 1'd1; - add0.left = m_0.out; - add0.right = const9.out; - m_0.in = 1'd1 ? add0.out; - upd2[done] = m_0.done ? 1'd1; - } - group upd3<"static"=1> { - l0.write_en = 1'd1; - add1.left = l0.out; - add1.right = const10.out; - l0.in = 1'd1 ? add1.out; - upd3[done] = l0.done ? 1'd1; - } - group upd4<"static"=1> { - k0.write_en = 1'd1; - add2.left = k0.out; - add2.right = const11.out; - k0.in = 1'd1 ? add2.out; - upd4[done] = k0.done ? 1'd1; - } - group upd5<"static"=1> { - j0.write_en = 1'd1; - add3.left = j0.out; - add3.right = const12.out; - j0.in = 1'd1 ? add3.out; - upd5[done] = j0.done ? 1'd1; - } - group upd6<"static"=1> { - i0.write_en = 1'd1; - add4.left = i0.out; - add4.right = const13.out; - i0.in = 1'd1 ? add4.out; - upd6[done] = i0.done ? 1'd1; - } - } - - control { - seq { - let0; - let1; - while le0.out with cond0 { - seq { - let2; - while le1.out with cond1 { - seq { - let3; - while le2.out with cond2 { - seq { - let4; - while le3.out with cond3 { - seq { - upd0; - upd1; - upd2; - upd3; - } - } - upd4; - } - } - upd5; - } - } - upd6; - } - } - } - } -} - -component main () -> () { - cells { - x8 = prim std_mem_d2(32, 1, 10, 1, 4); - x7 = prim std_mem_d2(32, 1, 10, 1, 4); - fc3_bias = prim std_mem_d1(32, 10, 4); - bias_add2 = bias_add2; - x6 = prim std_mem_d2(32, 1, 64, 1, 7); - fc3_weight = prim std_mem_d2(32, 10, 64, 4, 7); - dense2 = dense2; - x5 = prim std_mem_d2(32, 1, 64, 1, 7); - relu1 = relu1; - x4 = prim std_mem_d2(32, 1, 64, 1, 7); - fc2_bias = prim std_mem_d1(32, 64, 7); - bias_add1 = bias_add1; - x3 = prim std_mem_d2(32, 1, 128, 1, 8); - fc2_weight = prim std_mem_d2(32, 64, 128, 7, 8); - dense1 = dense1; - x2 = prim std_mem_d2(32, 1, 128, 1, 8); - relu0 = relu; - x1 = prim std_mem_d2(32, 1, 128, 1, 8); - fc1_bias = prim std_mem_d1(32, 128, 8); - bias_add0 = bias_add; - x = prim std_mem_d2(32, 1, 784, 1, 10); - fc1_weight = prim std_mem_d2(32, 128, 784, 8, 10); - dense0 = dense; - data = prim std_mem_d4(32, 1, 1, 28, 28, 1, 1, 5, 5); - batch_flatten0 = batch_flatten; - } - wires { - group run_batch_flatten { - data.addr0 = batch_flatten0.data0_0_0_0_addr0; - batch_flatten0.data0_0_0_0_read_data = data.read_data; - data.addr1 = batch_flatten0.data0_0_0_0_addr1; - data.addr2 = batch_flatten0.data0_0_0_0_addr2; - x.addr0 = batch_flatten0.x0_0_addr0; - x.addr1 = batch_flatten0.x0_0_addr1; - x.write_data = batch_flatten0.x0_0_write_data; - x.write_en = batch_flatten0.x0_0_write_en; - batch_flatten0.x0_0_done = x.done; - batch_flatten0.go = 1'd1; - run_batch_flatten[done] = batch_flatten0.done ? 1'd1; - } - group run_dense { - x.addr0 = dense0.x0_0_addr0; - dense0.x0_0_read_data = x.read_data; - x.addr1 = dense0.x0_0_addr1; - fc1_weight.addr0 = dense0.fc1_weight0_0_addr0; - dense0.fc1_weight0_0_read_data = fc1_weight.read_data; - fc1_weight.addr1 = dense0.fc1_weight0_0_addr1; - x1.addr0 = dense0.x10_0_addr0; - x1.addr1 = dense0.x10_0_addr1; - x1.write_data = dense0.x10_0_write_data; - x1.write_en = dense0.x10_0_write_en; - dense0.x10_0_done = x1.done; - dense0.go = 1'd1; - run_dense[done] = dense0.done ? 1'd1; - } - group run_bias_add { - x1.addr0 = bias_add0.x10_0_addr0; - bias_add0.x10_0_read_data = x1.read_data; - x1.addr1 = bias_add0.x10_0_addr1; - fc1_bias.addr0 = bias_add0.fc1_bias0_addr0; - bias_add0.fc1_bias0_read_data = fc1_bias.read_data; - x2.addr0 = bias_add0.x20_0_addr0; - x2.addr1 = bias_add0.x20_0_addr1; - x2.write_data = bias_add0.x20_0_write_data; - x2.write_en = bias_add0.x20_0_write_en; - bias_add0.x20_0_done = x2.done; - bias_add0.go = 1'd1; - run_bias_add[done] = bias_add0.done ? 1'd1; - } - group run_relu { - x2.addr0 = relu0.x20_0_addr0; - relu0.x20_0_read_data = x2.read_data; - x2.addr1 = relu0.x20_0_addr1; - x3.addr0 = relu0.x30_0_addr0; - x3.addr1 = relu0.x30_0_addr1; - x3.write_data = relu0.x30_0_write_data; - x3.write_en = relu0.x30_0_write_en; - relu0.x30_0_done = x3.done; - relu0.go = 1'd1; - run_relu[done] = relu0.done ? 1'd1; - } - group run_dense1 { - x3.addr0 = dense1.x30_0_addr0; - dense1.x30_0_read_data = x3.read_data; - x3.addr1 = dense1.x30_0_addr1; - fc2_weight.addr0 = dense1.fc2_weight0_0_addr0; - dense1.fc2_weight0_0_read_data = fc2_weight.read_data; - fc2_weight.addr1 = dense1.fc2_weight0_0_addr1; - x4.addr0 = dense1.x40_0_addr0; - x4.addr1 = dense1.x40_0_addr1; - x4.write_data = dense1.x40_0_write_data; - x4.write_en = dense1.x40_0_write_en; - dense1.x40_0_done = x4.done; - dense1.go = 1'd1; - run_dense1[done] = dense1.done ? 1'd1; - } - group run_bias_add1 { - x4.addr0 = bias_add1.x40_0_addr0; - bias_add1.x40_0_read_data = x4.read_data; - x4.addr1 = bias_add1.x40_0_addr1; - fc2_bias.addr0 = bias_add1.fc2_bias0_addr0; - bias_add1.fc2_bias0_read_data = fc2_bias.read_data; - x5.addr0 = bias_add1.x50_0_addr0; - x5.addr1 = bias_add1.x50_0_addr1; - x5.write_data = bias_add1.x50_0_write_data; - x5.write_en = bias_add1.x50_0_write_en; - bias_add1.x50_0_done = x5.done; - bias_add1.go = 1'd1; - run_bias_add1[done] = bias_add1.done ? 1'd1; - } - group run_relu1 { - x5.addr0 = relu1.x50_0_addr0; - relu1.x50_0_read_data = x5.read_data; - x5.addr1 = relu1.x50_0_addr1; - x6.addr0 = relu1.x60_0_addr0; - x6.addr1 = relu1.x60_0_addr1; - x6.write_data = relu1.x60_0_write_data; - x6.write_en = relu1.x60_0_write_en; - relu1.x60_0_done = x6.done; - relu1.go = 1'd1; - run_relu1[done] = relu1.done ? 1'd1; - } - group run_dense2 { - x6.addr0 = dense2.x60_0_addr0; - dense2.x60_0_read_data = x6.read_data; - x6.addr1 = dense2.x60_0_addr1; - fc3_weight.addr0 = dense2.fc3_weight0_0_addr0; - dense2.fc3_weight0_0_read_data = fc3_weight.read_data; - fc3_weight.addr1 = dense2.fc3_weight0_0_addr1; - x7.addr0 = dense2.x70_0_addr0; - x7.addr1 = dense2.x70_0_addr1; - x7.write_data = dense2.x70_0_write_data; - x7.write_en = dense2.x70_0_write_en; - dense2.x70_0_done = x7.done; - dense2.go = 1'd1; - run_dense2[done] = dense2.done ? 1'd1; - } - group run_bias_add2 { - x7.addr0 = bias_add2.x70_0_addr0; - bias_add2.x70_0_read_data = x7.read_data; - x7.addr1 = bias_add2.x70_0_addr1; - fc3_bias.addr0 = bias_add2.fc3_bias0_addr0; - bias_add2.fc3_bias0_read_data = fc3_bias.read_data; - x8.addr0 = bias_add2.x80_0_addr0; - x8.addr1 = bias_add2.x80_0_addr1; - x8.write_data = bias_add2.x80_0_write_data; - x8.write_en = bias_add2.x80_0_write_en; - bias_add2.x80_0_done = x8.done; - bias_add2.go = 1'd1; - run_bias_add2[done] = bias_add2.done ? 1'd1; - } - } - control { - seq { - run_batch_flatten; - run_dense; - run_bias_add; - run_relu; - run_dense1; - run_bias_add1; - run_relu1; - run_dense2; - run_bias_add2; - } - } -} diff --git a/frontends/relay-futil/tests/mlp_net.relay b/frontends/relay-futil/tests/mlp_net.relay deleted file mode 100644 index 8943360100..0000000000 --- a/frontends/relay-futil/tests/mlp_net.relay +++ /dev/null @@ -1,17 +0,0 @@ -v0.0.4 -fn (%data: Tensor[(1, 1, 28, 28), float32], %fc1_weight: Tensor[(128, 784), float32], %fc1_bias: Tensor[(128), float32], - %fc2_weight: Tensor[(64, 128), float32], %fc2_bias: Tensor[(64), float32], %fc3_weight: Tensor[(10, 64), float32], - %fc3_bias: Tensor[(10), float32]) -> Tensor[(1, 10), float32] { - let %x: Tensor[(1, 784), float32] = nn.batch_flatten(%data) /* ty=Tensor[(1, 784), float32] */; - let %x1: Tensor[(1, 128), float32] = nn.dense(%x, %fc1_weight, units=128) /* ty=Tensor[(1, 128), float32] */; - let %x2: Tensor[(1, 128), float32] = nn.bias_add(%x1, %fc1_bias, axis=-1) /* ty=Tensor[(1, 128), float32] */; - let %x3: Tensor[(1, 128), float32] = nn.relu(%x2) /* ty=Tensor[(1, 128), float32] */; - let %x4: Tensor[(1, 64), float32] = nn.dense(%x3, %fc2_weight, units=64) /* ty=Tensor[(1, 64), float32] */; - let %x5: Tensor[(1, 64), float32] = nn.bias_add(%x4, %fc2_bias, axis=-1) /* ty=Tensor[(1, 64), float32] */; - let %x6: Tensor[(1, 64), float32] = nn.relu(%x5) /* ty=Tensor[(1, 64), float32] */; - let %x7: Tensor[(1, 10), float32] = nn.dense(%x6, %fc3_weight, units=10) /* ty=Tensor[(1, 10), float32] */; - let %x8: Tensor[(1, 10), float32] = nn.bias_add(%x7, %fc3_bias, axis=-1) /* ty=Tensor[(1, 10), float32] */; - %x8 - // let %x9: Tensor[(1, 10), float32] = nn.softmax(%x8) /* ty=Tensor[(1, 10), float32] */; - // %x9 -} diff --git a/frontends/relay-futil/tests/relu.expect b/frontends/relay-futil/tests/relu.expect index 74b5646d9b..66f3dd53e8 100644 --- a/frontends/relay-futil/tests/relu.expect +++ b/frontends/relay-futil/tests/relu.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component relu(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, x0_0_0_0_addr0: 2, x0_0_0_0_addr1: 3, x0_0_0_0_addr2: 4, x0_0_0_0_addr3: 6, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 2, x10_0_0_0_addr1: 3, x10_0_0_0_addr2: 4, x10_0_0_0_addr3: 6, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { +component relu0(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, x0_0_0_0_addr0: 2, x0_0_0_0_addr1: 3, x0_0_0_0_addr2: 4, x0_0_0_0_addr3: 6, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 2, x10_0_0_0_addr1: 3, x10_0_0_0_addr2: 4, x10_0_0_0_addr3: 6, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { cells { add0 = prim std_add(6); add1 = prim std_add(4); @@ -193,25 +193,29 @@ component main () -> () { cells { x1 = prim std_mem_d4(32, 2, 4, 8, 32, 2, 3, 4, 6); x = prim std_mem_d4(32, 2, 4, 8, 32, 2, 3, 4, 6); - relu0 = relu; + comp_relu0 = relu0; } wires { - group run_relu { - x.addr0 = relu0.x0_0_0_0_addr0; - relu0.x0_0_0_0_read_data = x.read_data; - x.addr1 = relu0.x0_0_0_0_addr1; - x.addr2 = relu0.x0_0_0_0_addr2; - x1.addr0 = relu0.x10_0_0_0_addr0; - x1.write_data = relu0.x10_0_0_0_write_data; - x1.write_en = relu0.x10_0_0_0_write_en; - relu0.x10_0_0_0_done = x1.done; - relu0.go = 1'd1; - run_relu[done] = relu0.done ? 1'd1; + group run_relu0 { + x.addr0 = comp_relu0.x0_0_0_0_addr0; + comp_relu0.x0_0_0_0_read_data = x.read_data; + x.addr1 = comp_relu0.x0_0_0_0_addr1; + x.addr2 = comp_relu0.x0_0_0_0_addr2; + x.addr3 = comp_relu0.x0_0_0_0_addr3; + x1.addr0 = comp_relu0.x10_0_0_0_addr0; + x1.addr1 = comp_relu0.x10_0_0_0_addr1; + x1.addr2 = comp_relu0.x10_0_0_0_addr2; + x1.addr3 = comp_relu0.x10_0_0_0_addr3; + x1.write_data = comp_relu0.x10_0_0_0_write_data; + x1.write_en = comp_relu0.x10_0_0_0_write_en; + comp_relu0.x10_0_0_0_done = x1.done; + comp_relu0.go = 1'd1; + run_relu0[done] = comp_relu0.done ? 1'd1; } } control { seq { - run_relu; + run_relu0; } } } diff --git a/frontends/relay-futil/tests/softmax.expect b/frontends/relay-futil/tests/softmax.expect new file mode 100644 index 0000000000..128b2ebc0d --- /dev/null +++ b/frontends/relay-futil/tests/softmax.expect @@ -0,0 +1,210 @@ +import "primitives/std.lib"; + +component softmax0(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1) -> (done: 1, x0_0_addr0: 1, x0_0_addr1: 4, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 1, x10_0_addr1: 4, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1) { + cells { + add0 = prim fixed_p_std_add(32, 16, 16); + add1 = prim std_add(4); + add2 = prim std_add(4); + add3 = prim std_add(1); + bin_read0_0 = prim std_reg(32); + const0 = prim std_const(1, 0); + const1 = prim std_const(1, 0); + const2 = prim std_const(4, 0); + const3 = prim std_const(4, 9); + const4 = prim std_const(4, 1); + const5 = prim std_const(4, 0); + const6 = prim std_const(4, 9); + const7 = prim std_const(4, 1); + const8 = prim std_const(1, 1); + div_pipe0 = prim std_div_pipe(32); + exp0 = prim std_exp(); + exp1 = prim std_exp(); + fpconst0 = prim fixed_p_std_const(32, 16, 16, 0, 0); + i0 = prim std_reg(1); + j0 = prim std_reg(4); + k0 = prim std_reg(4); + le0 = prim std_le(1); + le1 = prim std_le(4); + le2 = prim std_le(4); + slice0 = prim std_slice(32, 32); + x1_read0_0 = prim std_reg(32); + x_expsum_0 = prim std_reg(32); + x_read0_0 = prim std_reg(32); + x_read1_0 = prim std_reg(32); + } + wires { + group cond0<"static"=0> { + cond0[done] = 1'd1; + le0.left = i0.out; + le0.right = const1.out; + } + group cond1<"static"=0> { + cond1[done] = 1'd1; + le1.left = j0.out; + le1.right = const3.out; + } + group cond2<"static"=0> { + cond2[done] = 1'd1; + le2.left = k0.out; + le2.right = const6.out; + } + group let0<"static"=1> { + i0.in = const0.out; + i0.write_en = 1'd1; + let0[done] = i0.done; + } + group let1<"static"=1> { + x_expsum_0.in = fpconst0.out; + x_expsum_0.write_en = 1'd1; + let1[done] = x_expsum_0.done; + } + group let2<"static"=1> { + j0.in = const2.out; + j0.write_en = 1'd1; + let2[done] = j0.done; + } + group let3<"static"=1> { + k0.in = const5.out; + k0.write_en = 1'd1; + let3[done] = k0.done; + } + group let4<"static"=1> { + bin_read0_0.in = slice0.out; + bin_read0_0.write_en = 1'd1; + let4[done] = bin_read0_0.done; + slice0.in = div_pipe0.out; + div_pipe0.left = x1_read0_0.out; + div_pipe0.right = x_expsum_0.out; + div_pipe0.go = !div_pipe0.done ? 1'd1; + } + group upd0<"static"=1> { + x_read0_0.write_en = 1'd1; + x0_0_addr1 = j0.out; + x0_0_addr0 = i0.out; + x_read0_0.in = 1'd1 ? x0_0_read_data; + upd0[done] = x_read0_0.done ? 1'd1; + } + group upd1 { + x_expsum_0.write_en = 1'd1; + add0.left = x_expsum_0.out; + add0.right = exp0.out; + exp0.exponent = x_read0_0.out; + exp0.go = !exp0.done ? 1'd1; + x_expsum_0.in = 1'd1 ? add0.out; + upd1[done] = x_expsum_0.done ? 1'd1; + } + group upd2<"static"=1> { + j0.write_en = 1'd1; + add1.left = j0.out; + add1.right = const4.out; + j0.in = 1'd1 ? add1.out; + upd2[done] = j0.done ? 1'd1; + } + group upd3<"static"=1> { + x_read1_0.write_en = 1'd1; + x0_0_addr1 = k0.out; + x0_0_addr0 = i0.out; + x_read1_0.in = 1'd1 ? x0_0_read_data; + upd3[done] = x_read1_0.done ? 1'd1; + } + group upd4 { + x10_0_addr1 = k0.out; + x10_0_addr0 = i0.out; + x10_0_write_en = exp1.done; + exp1.exponent = x_read1_0.out; + exp1.go = !exp1.done ? 1'd1; + x10_0_write_data = exp1.done ? exp1.out; + upd4[done] = x10_0_done ? 1'd1; + } + group upd5<"static"=1> { + x1_read0_0.write_en = 1'd1; + x10_0_addr1 = k0.out; + x10_0_addr0 = i0.out; + x1_read0_0.in = 1'd1 ? x10_0_read_data; + upd5[done] = x1_read0_0.done ? 1'd1; + } + group upd6<"static"=1> { + x10_0_addr1 = k0.out; + x10_0_addr0 = i0.out; + x10_0_write_en = 1'd1; + x10_0_write_data = 1'd1 ? bin_read0_0.out; + upd6[done] = x10_0_done ? 1'd1; + } + group upd7<"static"=1> { + k0.write_en = 1'd1; + add2.left = k0.out; + add2.right = const7.out; + k0.in = 1'd1 ? add2.out; + upd7[done] = k0.done ? 1'd1; + } + group upd8<"static"=1> { + i0.write_en = 1'd1; + add3.left = i0.out; + add3.right = const8.out; + i0.in = 1'd1 ? add3.out; + upd8[done] = i0.done ? 1'd1; + } + } + + control { + seq { + let0; + while le0.out with cond0 { + seq { + par { + let1; + seq { + let2; + while le1.out with cond1 { + seq { + upd0; + upd1; + upd2; + } + } + } + } + let3; + while le2.out with cond2 { + seq { + upd3; + upd4; + upd5; + let4; + upd6; + upd7; + } + } + upd8; + } + } + } + } +} + +component main () -> () { + cells { + x1 = prim std_mem_d2(32, 1, 10, 1, 4); + x = prim std_mem_d2(32, 1, 10, 1, 4); + comp_softmax0 = softmax0; + } + wires { + group run_softmax0 { + x.addr0 = comp_softmax0.x0_0_addr0; + comp_softmax0.x0_0_read_data = x.read_data; + x.addr1 = comp_softmax0.x0_0_addr1; + x1.addr0 = comp_softmax0.x10_0_addr0; + x1.addr1 = comp_softmax0.x10_0_addr1; + x1.write_data = comp_softmax0.x10_0_write_data; + x1.write_en = comp_softmax0.x10_0_write_en; + comp_softmax0.x10_0_done = x1.done; + comp_softmax0.go = 1'd1; + run_softmax0[done] = comp_softmax0.done ? 1'd1; + } + } + control { + seq { + run_softmax0; + } + } +} diff --git a/frontends/relay-futil/tests/softmax.relay b/frontends/relay-futil/tests/softmax.relay new file mode 100644 index 0000000000..df46a20d70 --- /dev/null +++ b/frontends/relay-futil/tests/softmax.relay @@ -0,0 +1,6 @@ +v0.0.4 +fn (%x: Tensor[(1, 10), float32]) { + let %x1: Tensor[(1, 10), float32] = nn.softmax(%x); + %x1 +} + diff --git a/frontends/relay-futil/tests/sqrt.expect b/frontends/relay-futil/tests/sqrt.expect index edb40c6259..ab67351192 100644 --- a/frontends/relay-futil/tests/sqrt.expect +++ b/frontends/relay-futil/tests/sqrt.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component sqrt(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, x0_0_0_0_addr0: 4, x0_0_0_0_addr1: 4, x0_0_0_0_addr2: 5, x0_0_0_0_addr3: 7, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 4, x10_0_0_0_addr1: 4, x10_0_0_0_addr2: 5, x10_0_0_0_addr3: 7, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { +component sqrt0(go: 1, clk: 1, x0_0_0_0_read_data: 32, x0_0_0_0_done: 1, x10_0_0_0_read_data: 32, x10_0_0_0_done: 1) -> (done: 1, x0_0_0_0_addr0: 4, x0_0_0_0_addr1: 4, x0_0_0_0_addr2: 5, x0_0_0_0_addr3: 7, x0_0_0_0_write_data: 32, x0_0_0_0_write_en: 1, x0_0_0_0_clk: 1, x10_0_0_0_addr0: 4, x10_0_0_0_addr1: 4, x10_0_0_0_addr2: 5, x10_0_0_0_addr3: 7, x10_0_0_0_write_data: 32, x10_0_0_0_write_en: 1, x10_0_0_0_clk: 1) { cells { add0 = prim std_add(7); add1 = prim std_add(5); @@ -156,25 +156,29 @@ component main () -> () { cells { x1 = prim std_mem_d4(32, 8, 8, 16, 64, 4, 4, 5, 7); x = prim std_mem_d4(32, 8, 8, 16, 64, 4, 4, 5, 7); - sqrt0 = sqrt; + comp_sqrt0 = sqrt0; } wires { - group run_sqrt { - x.addr0 = sqrt0.x0_0_0_0_addr0; - sqrt0.x0_0_0_0_read_data = x.read_data; - x.addr1 = sqrt0.x0_0_0_0_addr1; - x.addr2 = sqrt0.x0_0_0_0_addr2; - x1.addr0 = sqrt0.x10_0_0_0_addr0; - x1.write_data = sqrt0.x10_0_0_0_write_data; - x1.write_en = sqrt0.x10_0_0_0_write_en; - sqrt0.x10_0_0_0_done = x1.done; - sqrt0.go = 1'd1; - run_sqrt[done] = sqrt0.done ? 1'd1; + group run_sqrt0 { + x.addr0 = comp_sqrt0.x0_0_0_0_addr0; + comp_sqrt0.x0_0_0_0_read_data = x.read_data; + x.addr1 = comp_sqrt0.x0_0_0_0_addr1; + x.addr2 = comp_sqrt0.x0_0_0_0_addr2; + x.addr3 = comp_sqrt0.x0_0_0_0_addr3; + x1.addr0 = comp_sqrt0.x10_0_0_0_addr0; + x1.addr1 = comp_sqrt0.x10_0_0_0_addr1; + x1.addr2 = comp_sqrt0.x10_0_0_0_addr2; + x1.addr3 = comp_sqrt0.x10_0_0_0_addr3; + x1.write_data = comp_sqrt0.x10_0_0_0_write_data; + x1.write_en = comp_sqrt0.x10_0_0_0_write_en; + comp_sqrt0.x10_0_0_0_done = x1.done; + comp_sqrt0.go = 1'd1; + run_sqrt0[done] = comp_sqrt0.done ? 1'd1; } } control { seq { - run_sqrt; + run_sqrt0; } } } diff --git a/frontends/relay-futil/tests/sub.expect b/frontends/relay-futil/tests/sub.expect index c74af4fb2e..7b3f88385d 100644 --- a/frontends/relay-futil/tests/sub.expect +++ b/frontends/relay-futil/tests/sub.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component subtract(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { +component subtract0(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, y0_read_data: 32, y0_done: 1, z0_read_data: 32, z0_done: 1) -> (done: 1, x0_addr0: 1, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, y0_addr0: 1, y0_write_data: 32, y0_write_en: 1, y0_clk: 1, z0_addr0: 1, z0_write_data: 32, z0_write_en: 1, z0_clk: 1) { cells { add0 = prim std_add(1); const0 = prim std_const(1, 0); @@ -74,25 +74,25 @@ component main () -> () { z = prim std_mem_d1(32, 1, 1); x = prim std_mem_d1(32, 1, 1); y = prim std_mem_d1(32, 1, 1); - subtract0 = subtract; + comp_subtract0 = subtract0; } wires { - group run_subtract { - x.addr0 = subtract0.x0_addr0; - subtract0.x0_read_data = x.read_data; - y.addr0 = subtract0.y0_addr0; - subtract0.y0_read_data = y.read_data; - z.addr0 = subtract0.z0_addr0; - z.write_data = subtract0.z0_write_data; - z.write_en = subtract0.z0_write_en; - subtract0.z0_done = z.done; - subtract0.go = 1'd1; - run_subtract[done] = subtract0.done ? 1'd1; + group run_subtract0 { + x.addr0 = comp_subtract0.x0_addr0; + comp_subtract0.x0_read_data = x.read_data; + y.addr0 = comp_subtract0.y0_addr0; + comp_subtract0.y0_read_data = y.read_data; + z.addr0 = comp_subtract0.z0_addr0; + z.write_data = comp_subtract0.z0_write_data; + z.write_en = comp_subtract0.z0_write_en; + comp_subtract0.z0_done = z.done; + comp_subtract0.go = 1'd1; + run_subtract0[done] = comp_subtract0.done ? 1'd1; } } control { seq { - run_subtract; + run_subtract0; } } } diff --git a/frontends/relay-futil/tests/tensor1d_mult.expect b/frontends/relay-futil/tests/tensor1d_mult.expect index dac0e76d85..a2b7d5d802 100644 --- a/frontends/relay-futil/tests/tensor1d_mult.expect +++ b/frontends/relay-futil/tests/tensor1d_mult.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component multiply(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, x10_read_data: 32, x10_done: 1, y0_read_data: 32, y0_done: 1) -> (done: 1, x0_addr0: 3, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, x10_addr0: 3, x10_write_data: 32, x10_write_en: 1, x10_clk: 1, y0_addr0: 3, y0_write_data: 32, y0_write_en: 1, y0_clk: 1) { +component multiply0(go: 1, clk: 1, x0_read_data: 32, x0_done: 1, x10_read_data: 32, x10_done: 1, y0_read_data: 32, y0_done: 1) -> (done: 1, x0_addr0: 3, x0_write_data: 32, x0_write_en: 1, x0_clk: 1, x10_addr0: 3, x10_write_data: 32, x10_write_en: 1, x10_clk: 1, y0_addr0: 3, y0_write_data: 32, y0_write_en: 1, y0_clk: 1) { cells { add0 = prim std_add(3); bin_read0_0 = prim std_reg(32); @@ -82,25 +82,25 @@ component main () -> () { x1 = prim std_mem_d1(32, 4, 3); x = prim std_mem_d1(32, 4, 3); y = prim std_mem_d1(32, 4, 3); - multiply0 = multiply; + comp_multiply0 = multiply0; } wires { - group run_multiply { - x.addr0 = multiply0.x0_addr0; - multiply0.x0_read_data = x.read_data; - y.addr0 = multiply0.y0_addr0; - multiply0.y0_read_data = y.read_data; - x1.addr0 = multiply0.x10_addr0; - x1.write_data = multiply0.x10_write_data; - x1.write_en = multiply0.x10_write_en; - multiply0.x10_done = x1.done; - multiply0.go = 1'd1; - run_multiply[done] = multiply0.done ? 1'd1; + group run_multiply0 { + x.addr0 = comp_multiply0.x0_addr0; + comp_multiply0.x0_read_data = x.read_data; + y.addr0 = comp_multiply0.y0_addr0; + comp_multiply0.y0_read_data = y.read_data; + x1.addr0 = comp_multiply0.x10_addr0; + x1.write_data = comp_multiply0.x10_write_data; + x1.write_en = comp_multiply0.x10_write_en; + comp_multiply0.x10_done = x1.done; + comp_multiply0.go = 1'd1; + run_multiply0[done] = comp_multiply0.done ? 1'd1; } } control { seq { - run_multiply; + run_multiply0; } } } diff --git a/frontends/relay-futil/tests/tensor2d_add.expect b/frontends/relay-futil/tests/tensor2d_add.expect index d289badb27..c6a409b5af 100644 --- a/frontends/relay-futil/tests/tensor2d_add.expect +++ b/frontends/relay-futil/tests/tensor2d_add.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component add(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1, y0_0_read_data: 32, y0_0_done: 1) -> (done: 1, x0_0_addr0: 2, x0_0_addr1: 3, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 2, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, y0_0_addr0: 2, y0_0_addr1: 3, y0_0_write_data: 32, y0_0_write_en: 1, y0_0_clk: 1) { +component add0(go: 1, clk: 1, x0_0_read_data: 32, x0_0_done: 1, x10_0_read_data: 32, x10_0_done: 1, y0_0_read_data: 32, y0_0_done: 1) -> (done: 1, x0_0_addr0: 2, x0_0_addr1: 3, x0_0_write_data: 32, x0_0_write_en: 1, x0_0_clk: 1, x10_0_addr0: 2, x10_0_addr1: 3, x10_0_write_data: 32, x10_0_write_en: 1, x10_0_clk: 1, y0_0_addr0: 2, y0_0_addr1: 3, y0_0_write_data: 32, y0_0_write_en: 1, y0_0_clk: 1) { cells { add0 = prim std_add(32); add1 = prim std_add(3); @@ -106,28 +106,28 @@ component main () -> () { x1 = prim std_mem_d2(32, 2, 4, 2, 3); x = prim std_mem_d2(32, 2, 4, 2, 3); y = prim std_mem_d2(32, 2, 4, 2, 3); - add0 = add; + comp_add0 = add0; } wires { - group run_add { - x.addr0 = add0.x0_0_addr0; - add0.x0_0_read_data = x.read_data; - x.addr1 = add0.x0_0_addr1; - y.addr0 = add0.y0_0_addr0; - add0.y0_0_read_data = y.read_data; - y.addr1 = add0.y0_0_addr1; - x1.addr0 = add0.x10_0_addr0; - x1.addr1 = add0.x10_0_addr1; - x1.write_data = add0.x10_0_write_data; - x1.write_en = add0.x10_0_write_en; - add0.x10_0_done = x1.done; - add0.go = 1'd1; - run_add[done] = add0.done ? 1'd1; + group run_add0 { + x.addr0 = comp_add0.x0_0_addr0; + comp_add0.x0_0_read_data = x.read_data; + x.addr1 = comp_add0.x0_0_addr1; + y.addr0 = comp_add0.y0_0_addr0; + comp_add0.y0_0_read_data = y.read_data; + y.addr1 = comp_add0.y0_0_addr1; + x1.addr0 = comp_add0.x10_0_addr0; + x1.addr1 = comp_add0.x10_0_addr1; + x1.write_data = comp_add0.x10_0_write_data; + x1.write_en = comp_add0.x10_0_write_en; + comp_add0.x10_0_done = x1.done; + comp_add0.go = 1'd1; + run_add0[done] = comp_add0.done ? 1'd1; } } control { seq { - run_add; + run_add0; } } } diff --git a/frontends/relay-futil/tests/tensor3d_divide.expect b/frontends/relay-futil/tests/tensor3d_divide.expect index 5058296dd8..0476ac946a 100644 --- a/frontends/relay-futil/tests/tensor3d_divide.expect +++ b/frontends/relay-futil/tests/tensor3d_divide.expect @@ -1,6 +1,6 @@ import "primitives/std.lib"; -component divide(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_0_read_data: 32, x10_0_0_done: 1, y0_0_0_read_data: 32, y0_0_0_done: 1) -> (done: 1, x0_0_0_addr0: 3, x0_0_0_addr1: 3, x0_0_0_addr2: 3, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1, x10_0_0_addr0: 3, x10_0_0_addr1: 3, x10_0_0_addr2: 3, x10_0_0_write_data: 32, x10_0_0_write_en: 1, x10_0_0_clk: 1, y0_0_0_addr0: 3, y0_0_0_addr1: 3, y0_0_0_addr2: 3, y0_0_0_write_data: 32, y0_0_0_write_en: 1, y0_0_0_clk: 1) { +component divide0(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_0_read_data: 32, x10_0_0_done: 1, y0_0_0_read_data: 32, y0_0_0_done: 1) -> (done: 1, x0_0_0_addr0: 3, x0_0_0_addr1: 3, x0_0_0_addr2: 3, x0_0_0_write_data: 32, x0_0_0_write_en: 1, x0_0_0_clk: 1, x10_0_0_addr0: 3, x10_0_0_addr1: 3, x10_0_0_addr2: 3, x10_0_0_write_data: 32, x10_0_0_write_en: 1, x10_0_0_clk: 1, y0_0_0_addr0: 3, y0_0_0_addr1: 3, y0_0_0_addr2: 3, y0_0_0_write_data: 32, y0_0_0_write_en: 1, y0_0_0_clk: 1) { cells { add0 = prim std_add(3); add1 = prim std_add(3); @@ -56,7 +56,7 @@ component divide(go: 1, clk: 1, x0_0_0_read_data: 32, x0_0_0_done: 1, x10_0_0_re k0.write_en = 1'd1; let2[done] = k0.done; } - group let3<> { + group let3 { bin_read0_0.in = div_pipe0.out; bin_read0_0.write_en = div_pipe0.done; let3[done] = bin_read0_0.done; @@ -146,31 +146,31 @@ component main () -> () { x1 = prim std_mem_d3(32, 4, 5, 6, 3, 3, 3); x = prim std_mem_d3(32, 4, 5, 6, 3, 3, 3); y = prim std_mem_d3(32, 4, 5, 6, 3, 3, 3); - divide0 = divide; + comp_divide0 = divide0; } wires { - group run_divide { - x.addr0 = divide0.x0_0_0_addr0; - divide0.x0_0_0_read_data = x.read_data; - x.addr1 = divide0.x0_0_0_addr1; - x.addr2 = divide0.x0_0_0_addr2; - y.addr0 = divide0.y0_0_0_addr0; - divide0.y0_0_0_read_data = y.read_data; - y.addr1 = divide0.y0_0_0_addr1; - y.addr2 = divide0.y0_0_0_addr2; - x1.addr0 = divide0.x10_0_0_addr0; - x1.addr1 = divide0.x10_0_0_addr1; - x1.addr2 = divide0.x10_0_0_addr2; - x1.write_data = divide0.x10_0_0_write_data; - x1.write_en = divide0.x10_0_0_write_en; - divide0.x10_0_0_done = x1.done; - divide0.go = 1'd1; - run_divide[done] = divide0.done ? 1'd1; + group run_divide0 { + x.addr0 = comp_divide0.x0_0_0_addr0; + comp_divide0.x0_0_0_read_data = x.read_data; + x.addr1 = comp_divide0.x0_0_0_addr1; + x.addr2 = comp_divide0.x0_0_0_addr2; + y.addr0 = comp_divide0.y0_0_0_addr0; + comp_divide0.y0_0_0_read_data = y.read_data; + y.addr1 = comp_divide0.y0_0_0_addr1; + y.addr2 = comp_divide0.y0_0_0_addr2; + x1.addr0 = comp_divide0.x10_0_0_addr0; + x1.addr1 = comp_divide0.x10_0_0_addr1; + x1.addr2 = comp_divide0.x10_0_0_addr2; + x1.write_data = comp_divide0.x10_0_0_write_data; + x1.write_en = comp_divide0.x10_0_0_write_en; + comp_divide0.x10_0_0_done = x1.done; + comp_divide0.go = 1'd1; + run_divide0[done] = comp_divide0.done ? 1'd1; } } control { seq { - run_divide; + run_divide0; } } } diff --git a/frontends/relay-futil/utilities.py b/frontends/relay-futil/utilities.py index aab5fd2a72..58fbb1ebc1 100644 --- a/frontends/relay-futil/utilities.py +++ b/frontends/relay-futil/utilities.py @@ -6,6 +6,11 @@ NumDimensionsToPrimitive = {1: PrimitiveType.Memory1D, 2: PrimitiveType.Memory2D, 3: PrimitiveType.Memory3D, 4: PrimitiveType.Memory4D} +# Mapping between primitive type and associated Dahlia name extension. +# E.g. A 2D memory primitive named `A` will be lowered to `A0_0`. +DahliaNameExtension = {PrimitiveType.Memory1D: '0', PrimitiveType.Memory2D: '0_0', + PrimitiveType.Memory3D: '0_0_0', PrimitiveType.Memory4D: '0_0_0_0'} + def flatten(l): ''' @@ -26,8 +31,9 @@ def get_dahlia_data_type(relay_type): Gets the Dahlia data type from the given Relay type. NOTE: Currently, Dahlia does not support signed types for arrays. ''' - if 'int' in relay_type: return 'ubit' - if 'float' in relay_type: return 'ufix' + dtype = relay_type.dtype + if 'int' in dtype: return 'ubit' + if 'float' in dtype: return 'ufix' assert False, f'{relay_type} is not supported.' @@ -37,12 +43,12 @@ def get_bitwidth(relay_type): If the relay_type is floating point of size N, returns a fixed point of size . This lowers to a fixed point cell with `int_width` of size N/2, and a `fract_width` of size N/2. ''' - type = str(relay_type) - length = len(type) - if 'int' in type: return type[3:length] - if 'float' in type: - width = int(type[5:length]) - return f'{width}, {int(width / 2)}' + dtype = relay_type.dtype + length = len(dtype) + if 'int' in dtype: return dtype[3:length] + if 'float' in dtype: + width = dtype[5:length] + return f'{width}, {int(width) // 2}' assert False, f'{relay_type} is not supported.' @@ -55,64 +61,62 @@ def get_memory_parameters(type): We then parse this to determine the corresponding FuTIL and Dahlia types. ''' - t = str(type) - data_type = get_dahlia_data_type(t) - if t[0:3] == 'int' or t[0:5] == 'float': + typ = str(type) + data_type = get_dahlia_data_type(type) + + if typ[0:3] == 'int' or typ[0:5] == 'float': + # Currently, we are treating scalar values as 1D Memory primitives. return [get_bitwidth(type), 1, 1], PrimitiveType.Memory1D, data_type - assert t[0:6] == 'Tensor', f'{type} is not currently supported.' - string_type = t[t.find(")") + 3:t.find("]")] - string_dimensions = t[t.find("(") + 1:t.find(")")] + assert typ[0:6] == 'Tensor', f'{type} is not currently supported.' - tensor_dimensions = list(map(int, string_dimensions.split(','))) - data, num_dimensions = [get_bitwidth(string_type)], len(tensor_dimensions) + tensor_dimensions = type.concrete_shape + data, num_dimensions = [get_bitwidth(type)], len(tensor_dimensions) assert num_dimensions in NumDimensionsToPrimitive, f'{num_dimensions} dimensions is not supported.' for dimension in tensor_dimensions: data.append(dimension) # Size. for dimension in tensor_dimensions: data.append(int(math.log2(dimension) + 1)) # Index size. return data, NumDimensionsToPrimitive[num_dimensions], data_type -def build_main_controls(c: FComponent): +def build_main_controls(component: FComponent): ''' - Builds the wires and control for the `main` component. - This is done by creating a group run_* with its respective - wiring for each Dahlia declaration, and adding it to the - control. + Builds the wires and control for the `main` component. This is done by creating a group `run_*` + with its respective wiring for each Relay function call, and adding it to the control. ''' - dahlia_declarations = [] - for cell in reversed(c.cells): - if not cell.is_dahlia_declaration(): continue - dahlia_declarations.append(cell.dahlia_declaration) - - for declaration in dahlia_declarations: - inputs = declaration.inputs + for cell in reversed(component.cells.values()): + if not cell.is_relay_function(): continue + function = cell.relay_function + inputs, output = function.inputs, function.output wires = [] - group_name = f'run_{declaration.component_name}' + group_name = f'run_{function.component_name}' for input in flatten(inputs): prim = input.primitive - wires.append(FWire(f'{prim.name}.addr0', f'{declaration.decl_name}.{input.dahlia_name}_addr0')) + wires.append(FWire(f'{prim.name}.addr0', f'{function.name}.{input.dahlia_name}_addr0')) wires.append( - FWire(f'{declaration.decl_name}.{input.dahlia_name}_read_data', f'{prim.name}.read_data')) + FWire(f'{function.name}.{input.dahlia_name}_read_data', f'{prim.name}.read_data')) if prim.type == PrimitiveType.Memory1D: continue - wires.append(FWire(f'{prim.name}.addr1', f'{declaration.decl_name}.{input.dahlia_name}_addr1')) + wires.append(FWire(f'{prim.name}.addr1', f'{function.name}.{input.dahlia_name}_addr1')) if prim.type == PrimitiveType.Memory2D: continue - wires.append(FWire(f'{prim.name}.addr2', f'{declaration.decl_name}.{input.dahlia_name}_addr2')) - - output = declaration.output - wires.append(FWire(f'{output.primitive.name}.addr0', f'{declaration.decl_name}.{output.dahlia_name}_addr0')) - if output.primitive.type == PrimitiveType.Memory2D or output.primitive.type == PrimitiveType.Memory3D: - wires.append(FWire(f'{output.primitive.name}.addr1', f'{declaration.decl_name}.{output.dahlia_name}_addr1')) - if output.primitive.type == PrimitiveType.Memory3D: - wires.append(FWire(f'{output.primitive.name}.addr2', f'{declaration.decl_name}.{output.dahlia_name}_addr2')) - - wires.append( - FWire(f'{output.primitive.name}.write_data', f'{declaration.decl_name}.{output.dahlia_name}_write_data')) - wires.append( - FWire(f'{output.primitive.name}.write_en', f'{declaration.decl_name}.{output.dahlia_name}_write_en')) - wires.append(FWire(f'{declaration.decl_name}.{output.dahlia_name}_done', f'{output.primitive.name}.done')) - wires.append(FWire(f'{declaration.decl_name}.go', "1'd1")) - wires.append(FWire(f'{group_name}[done]', f"{declaration.decl_name}.done ? 1'd1")) - c.wires.append(FConnection(group=FGroup(name=group_name, wires=wires, attributes=[]))) - - # Ensures that only group names make it into the controls of a component. - connections = list(filter(lambda w: w.is_group(), c.wires)) - c.controls = [Seq(stmts=list(map(lambda w: w.group.name, connections)))] + wires.append(FWire(f'{prim.name}.addr2', f'{function.name}.{input.dahlia_name}_addr2')) + if prim.type == PrimitiveType.Memory3D: continue + wires.append(FWire(f'{prim.name}.addr3', f'{function.name}.{input.dahlia_name}_addr3')) + + output_type, output_name = output.primitive.type, output.primitive.name + for i in range(0, 1): + wires.append(FWire(f'{output_name}.addr0', f'{function.name}.{output.dahlia_name}_addr0')) + if output_type == PrimitiveType.Memory1D: break + wires.append(FWire(f'{output_name}.addr1', f'{function.name}.{output.dahlia_name}_addr1')) + if output_type == PrimitiveType.Memory2D: break + wires.append(FWire(f'{output_name}.addr2', f'{function.name}.{output.dahlia_name}_addr2')) + if output_type == PrimitiveType.Memory3D: break + wires.append(FWire(f'{output_name}.addr3', f'{function.name}.{output.dahlia_name}_addr3')) + + wires.append(FWire(f'{output_name}.write_data', f'{function.name}.{output.dahlia_name}_write_data')) + wires.append(FWire(f'{output_name}.write_en', f'{function.name}.{output.dahlia_name}_write_en')) + wires.append(FWire(f'{function.name}.{output.dahlia_name}_done', f'{output_name}.done')) + wires.append(FWire(f'{function.name}.go', "1'd1")) + wires.append(FWire(f'{group_name}[done]', f"{function.name}.done ? 1'd1")) + component.wires.append(FConnection(group=FGroup(name=group_name, wires=wires, attributes=[]))) + + # Ensures that only group names make it into the controls of a FuTIL component. + connections = list(filter(lambda w: w.is_group(), component.wires)) + component.controls = [Seq(stmts=list(map(lambda w: w.group.name, connections)))] diff --git a/fud/fud/main.py b/fud/fud/main.py index b82e40ef6f..34d7d93ef1 100644 --- a/fud/fud/main.py +++ b/fud/fud/main.py @@ -44,6 +44,10 @@ def register_stages(registry, cfg): cfg, 'futil-noinline', '-b futil -d hole-inliner', 'Compile FuTIL to FuTIL to remove all control and inline groups' )) + registry.register( + futil.FutilStage(cfg, 'futil-externalize', '-b futil -p externalize', + 'Compile FuTIL to FuTIL to externalize all external memory primitives' + )) # Verilator registry.register( diff --git a/primitives/std.lib b/primitives/std.lib index ebac340226..8cc38c3c24 100644 --- a/primitives/std.lib +++ b/primitives/std.lib @@ -709,6 +709,28 @@ primitive std_le<"share"=1>[width](left: width, right: width) -> (out: 1) { } } +primitive std_exp(exponent: 32, go: 1, clk: 1) -> (out: 32, done: 1) { + verilog { + module std_exp + (input logic [31:0] exponent, + input logic go, + input logic clk, + output logic [31:0] out, + output logic done); + always_ff @(posedge clk) begin + if (go) begin + /* verilator lint_off REALCVT */ + out = 2.718281 ** exponent; + done = 1; + end else begin + out = 0; + done = 0; + end + end + endmodule + } +} + primitive std_sqrt(in: 32, go: 1, clk: 1) -> (out: 32, done: 1) { verilog { module std_sqrt @@ -822,7 +844,7 @@ primitive fixed_p_std_const[width, int_width, fract_width, value1, value2] () -> module fixed_p_std_const #(parameter width=32, parameter int_width = 8, - parameter fract_width= 24, + parameter fract_width = 24, parameter value1 = 0, parameter value2 = 0) @@ -1490,4 +1512,4 @@ primitive sfixed_p_std_add_dbit[width1, width2 , int_width1, fract_width1, int_w assign out = {whole_int, whole_fract}; endmodule } -} +} \ No newline at end of file