diff --git a/hls4ml/backends/__init__.py b/hls4ml/backends/__init__.py index 8b3117af7a..7ba2ad4fbb 100644 --- a/hls4ml/backends/__init__.py +++ b/hls4ml/backends/__init__.py @@ -5,14 +5,19 @@ from hls4ml.backends.vivado.vivado_backend import VivadoBackend from hls4ml.backends.vivado_accelerator.vivado_accelerator_backend import VivadoAcceleratorBackend from hls4ml.backends.vivado_accelerator.vivado_accelerator_config import VivadoAcceleratorConfig # noqa: F401 - from hls4ml.backends.catapult.catapult_backend import CatapultBackend # isort: skip - from hls4ml.backends.vitis.vitis_backend import VitisBackend # isort: skip +from hls4ml.backends.vitis_accelerator_ip_flow.vitis_accelerator_ip_flow_backend import ( + VitisAcceleratorIPFlowBackend, +) +from hls4ml.backends.vitis_accelerator_ip_flow.vitis_accelerator_ip_flow_config import ( + VitisAcceleratorIPFlowConfig, +) register_backend('Vivado', VivadoBackend) register_backend('VivadoAccelerator', VivadoAcceleratorBackend) register_backend('Vitis', VitisBackend) +register_backend('VitisAcceleratorIPFlow', VitisAcceleratorIPFlowBackend) register_backend('Quartus', QuartusBackend) register_backend('Catapult', CatapultBackend) register_backend('SymbolicExpression', SymbolicExpressionBackend) diff --git a/hls4ml/backends/vitis_accelerator_ip_flow/__init__.py b/hls4ml/backends/vitis_accelerator_ip_flow/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/hls4ml/backends/vitis_accelerator_ip_flow/passes/__init__.py b/hls4ml/backends/vitis_accelerator_ip_flow/passes/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/hls4ml/backends/vitis_accelerator_ip_flow/supported_boards.json b/hls4ml/backends/vitis_accelerator_ip_flow/supported_boards.json new file mode 100644 index 0000000000..1279ec22d0 --- /dev/null +++ b/hls4ml/backends/vitis_accelerator_ip_flow/supported_boards.json @@ -0,0 +1,42 @@ +{ + "pynq-z2": { + "part": "xc7z020clg400-1", + "tcl_scripts": {"axi_lite": "axi_lite_design.tcl", "axi_stream": "axi_stream_design.tcl"}, + "python_drivers": {"axi_stream": "axi_stream_driver.py"}, + "c_drivers": {} + }, + "zcu102": { + "part": "xczu9eg-ffvb1156-2-e", + "tcl_scripts": { "axi_stream": "axi_stream_design.tcl"}, + "python_drivers": {"axi_stream": "axi_stream_driver.py"}, + "c_drivers": {} + }, + "alveo-u50": { + "part": "xcu50-fsvh2104-2-e", + "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"}, + "python_drivers": {"axi_stream": "axi_stream_driver.py"}, + "krnl_rtl_srcs": {"axi_stream": "krnl_rtl_src"}, + "c_drivers": {} + }, + "alveo-u250": { + "part": "xcu250-figd2104-2L-e", + "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"}, + "python_drivers": {"axi_stream": "axi_stream_driver.py"}, + "krnl_rtl_srcs": {"axi_stream": "krnl_rtl_src"}, + "c_drivers": {} + }, + "alveo-u200": { + "part": "xcu200-fsgd2104-2-e", + "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"}, + "python_drivers": {"axi_stream": "axi_stream_driver.py"}, + "krnl_rtl_srcs": {"axi_stream": "krnl_rtl_src"}, + "c_drivers": {} + }, + "alveo-u280": { + "part": "xcu280-fsvh2892-2L-e", + "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"}, + "python_drivers": {"axi_stream": "axi_stream_driver.py"}, + "krnl_rtl_srcs": {"axi_stream": "krnl_rtl_src"}, + "c_drivers": {} + } +} diff --git a/hls4ml/backends/vitis_accelerator_ip_flow/vitis_accelerator_ip_flow_backend.py b/hls4ml/backends/vitis_accelerator_ip_flow/vitis_accelerator_ip_flow_backend.py new file mode 100644 index 0000000000..0372a75b75 --- /dev/null +++ b/hls4ml/backends/vitis_accelerator_ip_flow/vitis_accelerator_ip_flow_backend.py @@ -0,0 +1,122 @@ +import os + +from hls4ml.backends import VitisBackend, VivadoBackend +from hls4ml.model.flow import register_flow +from hls4ml.report import parse_vivado_report + + +class VitisAcceleratorIPFlowBackend(VitisBackend): + def __init__(self): + super(VivadoBackend, self).__init__(name='VitisAcceleratorIPFlow') + self._register_layer_attributes() + self._register_flows() + + def build( + self, + model, + reset=False, + csim=True, + synth=True, + cosim=False, + validation=False, + export=False, + vsynth=False, + # fifo_opt=False, + bitfile=False, + ): + # run the VitisBackend build + super().build( + model, + reset=reset, + csim=csim, + synth=synth, + cosim=cosim, + validation=validation, + export=export, + vsynth=vsynth, + # fifo_opt=fifo_opt, + ) + # Get Config to view Board and Platform + # from hls4ml.backends import VitisAcceleratorIPFlowConfig + + # vitis_accelerator_ip_flow_config = VitisAcceleratorIPFlowConfig( + # model.config, model.get_input_variables(), model.get_output_variables() + # ) + # now make a bitfile + if bitfile: + curr_dir = os.getcwd() + os.chdir(model.config.get_output_dir()) + try: + os.system('vivado -mode batch -source design.tcl') # check if this is accepted as a command + except Exception: + print("Something went wrong, check the Vivado logs") + os.chdir(curr_dir) + + return parse_vivado_report(model.config.get_output_dir()) + + def create_initial_config( + self, + board='pynq-z2', + part=None, + clock_period=5, + clock_uncertainty='12.5%', + io_type='io_parallel', + interface='axi_stream', + driver='python', + input_type='float', + output_type='float', + platform='xilinx_u250_xdma_201830_2', + ): + ''' + Create initial accelerator config with default parameters + + Args: + board: one of the keys defined in supported_boards.json + clock_period: clock period passed to hls project + io_type: io_parallel or io_stream + interface: `axi_stream`: generate hardware designs and drivers which exploit axi stream channels. + `axi_master`: generate hardware designs and drivers which exploit axi master channels. + `axi_lite` : generate hardware designs and drivers which exploit axi lite channels. (Don't use it + to exchange large amount of data) + driver: `python`: generates the python driver to use the accelerator in the PYNQ stack. + `c`: generates the c driver to use the accelerator bare-metal. + input_type: the wrapper input precision. Can be `float` or an `ap_type`. Note: VivadoAcceleratorBackend + will round the number of bits used to the next power-of-2 value. + output_type: the wrapper output precision. Can be `float` or an `ap_type`. Note: + VivadoAcceleratorBackend will round the number of bits used to the next power-of-2 value. + platform: development target platform + + Returns: + populated config + ''' + board = board if board is not None else 'pynq-z2' + config = super().create_initial_config(part, clock_period, clock_uncertainty, io_type) + config['AcceleratorConfig'] = {} + config['AcceleratorConfig']['Board'] = board + config['AcceleratorConfig']['Interface'] = interface # axi_stream, axi_master, axi_lite + config['AcceleratorConfig']['Driver'] = driver + config['AcceleratorConfig']['Precision'] = {} + config['AcceleratorConfig']['Precision']['Input'] = {} + config['AcceleratorConfig']['Precision']['Output'] = {} + config['AcceleratorConfig']['Precision']['Input'] = input_type # float, double or ap_fixed + config['AcceleratorConfig']['Precision']['Output'] = output_type # float, double or ap_fixed + # if board.startswith('alveo'): + # config['AcceleratorConfig']['Platform'] = platform + + return config + + def get_default_flow(self): + return self._default_flow + + def get_writer_flow(self): + return self._writer_flow + + def _register_flows(self): + vivado_ip = 'vivado:ip' + writer_passes = ['make_stamp', 'vitisacceleratoripflow:write_hls'] + self._writer_flow = register_flow('write', writer_passes, requires=[vivado_ip], backend=self.name) + self._default_flow = vivado_ip + + # fifo_depth_opt_passes = ['vivadoaccelerator:fifo_depth_optimization'] + writer_passes + + # register_flow('fifo_depth_optimization', fifo_depth_opt_passes, requires=[vivado_ip], backend=self.name) diff --git a/hls4ml/backends/vitis_accelerator_ip_flow/vitis_accelerator_ip_flow_config.py b/hls4ml/backends/vitis_accelerator_ip_flow/vitis_accelerator_ip_flow_config.py new file mode 100644 index 0000000000..07961a9b6f --- /dev/null +++ b/hls4ml/backends/vitis_accelerator_ip_flow/vitis_accelerator_ip_flow_config.py @@ -0,0 +1,169 @@ +import json +import os + +import numpy as np + +from hls4ml.model.layers import FixedPrecisionType, IntegerPrecisionType + + +class VitisAcceleratorIPFlowConfig: + def __init__(self, config, model_inputs, model_outputs): + self.config = config.config + self.board = self.config.get('AcceleratorConfig', {}).get('Board', 'pynq-z2') + self.supported_boards = json.load(open(os.path.dirname(__file__) + '/supported_boards.json')) + if self.board in self.supported_boards.keys(): + board_info = self.supported_boards[self.board] + self.part = board_info['part'] + else: + raise Exception('The board does not appear in supported_boards.json file') + + if self.config.get('Part') is not None: + if self.config.get('Part') != self.part: + print( + 'WARNING: You set a Part that does not correspond to the Board you specified. The correct ' + 'Part is now set.' + ) + self.config['Part'] = self.part + accel_config = self.config.get('AcceleratorConfig', None) + if accel_config is not None: + prec = accel_config.get('Precision') + if prec is None: + raise Exception('Precision must be provided in the AcceleratorConfig') + else: + if prec.get('Input') is None or prec.get('Output') is None: + raise Exception('Input and Output fields must be provided in the AcceleratorConfig->Precision') + else: + accel_config = { + 'Precision': {'Input': 'float', 'Output': 'float'}, + 'Driver': 'python', + 'Interface': 'axi_stream', + } + config.config['AcceleratorConfig'] = accel_config + + self.interface = self.config['AcceleratorConfig'].get('Interface', 'axi_stream') # axi_stream, axi_master, axi_lite + self.driver = self.config['AcceleratorConfig'].get('Driver', 'python') # python or c + self.input_type = self.config['AcceleratorConfig']['Precision'].get( + 'Input', 'float' + ) # float, double or ap_fixed + self.output_type = self.config['AcceleratorConfig']['Precision'].get( + 'Output', 'float' + ) # float, double or ap_fixed + self.platform = self.config['AcceleratorConfig'].get( + 'Platform', 'xilinx_u250_xdma_201830_2' + ) # Get platform folder name + + assert ( + len(model_inputs) == 1 + ), "Only models with one input tensor are currently supported by VitisAcceleratorIPFlowBackend" + assert ( + len(model_outputs) == 1 + ), "Only models with one output tensor are currently supported by VitisAcceleratorIPFlowBackend" + self.inp = model_inputs[0] + self.out = model_outputs[0] + inp_axi_t = self.input_type + out_axi_t = self.output_type + + if inp_axi_t not in ['float', 'double']: + self.input_type = self._next_factor8_type(config.backend.convert_precision_string(inp_axi_t)) + if out_axi_t not in ['float', 'double']: + self.output_type = self._next_factor8_type(config.backend.convert_precision_string(out_axi_t)) + + if self.input_type == 'float': + self.input_bitwidth = 32 + elif self.input_type == 'double': + self.input_bitwidth = 64 + else: + self.input_bitwidth = config.backend.convert_precision_string(inp_axi_t).width + + if out_axi_t == 'float': + self.output_bitwidth = 32 + elif out_axi_t == 'double': + self.output_bitwidth = 64 + else: + self.output_bitwidth = config.backend.convert_precision_string(out_axi_t).width + + def _next_factor8_type(self, p): + '''Return a new type with the width rounded to the next factor of 8 up to p's width + Args: + p : IntegerPrecisionType or FixedPrecisionType + Returns: + An IntegerPrecisionType or FixedPrecisionType with the width rounder up to the next factor of 8 + of p's width. Other parameters (fractional bits, extra modes) stay the same. + ''' + W = p.width + newW = int(np.ceil(W / 8) * 8) + if isinstance(p, FixedPrecisionType): + return FixedPrecisionType(newW, p.integer, p.signed, p.rounding_mode, p.saturation_mode, p.saturation_bits) + elif isinstance(p, IntegerPrecisionType): + return IntegerPrecisionType(newW, p.signed) + + def get_io_bitwidth(self): + return self.input_bitwidth, self.output_bitwidth + + def get_corrected_types(self): + return self.input_type, self.output_type, self.inp, self.out + + def get_interface(self): + return self.interface + + def get_board_info(self, board=None): + if board is None: + board = self.board + if board in self.supported_boards.keys(): + return self.supported_boards[board] + else: + raise Exception('The board is still not supported') + + def get_part(self): + return self.part + + def get_driver(self): + return self.driver + + def get_board(self): + return self.board + + def get_platform(self): + return self.platform + + def get_clock_period(self): + return self.clock_period + + def get_driver_path(self): + if self.board.startswith('alveo'): + return '../templates/vitis_accelerator_ip_flow/' + 'alveo/' + self.driver + '_drivers/' + self.get_driver_file() + else: + return ( + '../templates/vitis_accelerator_ip_flow/' + + self.board + + '/' + + self.driver + + '_drivers/' + + self.get_driver_file() + ) + + def get_driver_file(self): + driver_ext = '.py' if self.driver == 'python' else '.h' + return self.interface + '_driver' + driver_ext + + def get_krnl_rtl_src_dir(self): + return '../templates/vitis_accelerator_ip_flow/' + 'alveo/' + '/krnl_rtl_src' + + def get_input_type(self): + return self.input_type + + def get_output_type(self): + return self.output_type + + def get_tcl_file_path(self): + board_info = self.get_board_info(self.board) + tcl_scripts = board_info.get('tcl_scripts', None) + if tcl_scripts is None: + raise Exception('No tcl scripts definition available for the board in supported_board.json') + tcl_script = tcl_scripts.get(self.interface, None) + if tcl_script is None: + raise Exception('No tcl script definition available for the desired interface in supported_board.json') + if self.board.startswith('alveo'): + return '../templates/vitis_accelerator_ip_flow/' + 'alveo/' + '/tcl_scripts/' + tcl_script + else: + return '../templates/vitis_accelerator_ip_flow/' + self.board + '/tcl_scripts/' + tcl_script diff --git a/hls4ml/templates/vitis/nnet_utils/nnet_sepconv1d_stream.h b/hls4ml/templates/vitis/nnet_utils/nnet_sepconv1d_stream.h index 20b6fecb49..aad5d9a430 100644 --- a/hls4ml/templates/vitis/nnet_utils/nnet_sepconv1d_stream.h +++ b/hls4ml/templates/vitis/nnet_utils/nnet_sepconv1d_stream.h @@ -86,7 +86,7 @@ void separable_conv_1d_cl(hls::stream &data, hls::stream &res, #pragma HLS DATAFLOW hls::stream depthwise_res; - unsigned res_depth = CONFIG_T::depthwise_config::out_width; + const unsigned res_depth = CONFIG_T::depthwise_config::out_width; #pragma HLS STREAM variable=depthwise_res depth=res_depth depthwise_conv_1d_buffer_cl(data, depthwise_res, diff --git a/hls4ml/templates/vitis/nnet_utils/nnet_sepconv2d_stream.h b/hls4ml/templates/vitis/nnet_utils/nnet_sepconv2d_stream.h index a3747990e0..a119fb9e2a 100644 --- a/hls4ml/templates/vitis/nnet_utils/nnet_sepconv2d_stream.h +++ b/hls4ml/templates/vitis/nnet_utils/nnet_sepconv2d_stream.h @@ -120,7 +120,7 @@ void separable_conv_2d_cl(hls::stream &data, hls::stream &res, #pragma HLS DATAFLOW hls::stream depthwise_res; - unsigned res_depth = CONFIG_T::depthwise_config::out_height * CONFIG_T::depthwise_config::out_width; + const unsigned res_depth = CONFIG_T::depthwise_config::out_height * CONFIG_T::depthwise_config::out_width; #pragma HLS STREAM variable=depthwise_res depth=res_depth depthwise_conv_2d_buffer_cl(data, depthwise_res, diff --git a/hls4ml/templates/vitis_accelerator_ip_flow/build_lib.sh b/hls4ml/templates/vitis_accelerator_ip_flow/build_lib.sh new file mode 100644 index 0000000000..262ce00d63 --- /dev/null +++ b/hls4ml/templates/vitis_accelerator_ip_flow/build_lib.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +CC=g++ +if [[ "$OSTYPE" == "linux-gnu" ]]; then + CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique" +elif [[ "$OSTYPE" == "darwin"* ]]; then + CFLAGS="-O3 -fPIC -std=c++11" +fi +VITIS_ACCELERATOR_FLAGS="VITIS_ACCELERATOR_IP_FLOW" +CFLAGS="$CFLAGS -D$VITIS_ACCELERATOR_FLAGS" + +INCFLAGS="-Ifirmware/ap_types/" + +PROJECT=myproject +LIB_STAMP=mystamp + +${CC} ${CFLAGS} ${INCFLAGS} -c firmware/${PROJECT}.cpp -o ${PROJECT}.o +${CC} ${CFLAGS} ${INCFLAGS} -c firmware/${PROJECT}_axi.cpp -o ${PROJECT}_axi.o +${CC} ${CFLAGS} ${INCFLAGS} -c ${PROJECT}_bridge.cpp -o ${PROJECT}_bridge.o +${CC} ${CFLAGS} ${INCFLAGS} -shared ${PROJECT}.o ${PROJECT}_axi.o ${PROJECT}_bridge.o -o firmware/${PROJECT}-${LIB_STAMP}.so +rm -f *.o diff --git a/hls4ml/templates/vitis_accelerator_ip_flow/myproject_axi.cpp b/hls4ml/templates/vitis_accelerator_ip_flow/myproject_axi.cpp new file mode 100644 index 0000000000..cf6c0b9c25 --- /dev/null +++ b/hls4ml/templates/vitis_accelerator_ip_flow/myproject_axi.cpp @@ -0,0 +1,14 @@ +// hls-fpga-machine-learning insert include + +void myproject_axi(hls::stream &in, hls::stream &out) { + + // hls-fpga-machine-learning insert interface + + // hls-fpga-machine-learning insert local vars + + // hls-fpga-machine-learning insert enqueue + + // hls-fpga-machine-learning insert call + + // hls-fpga-machine-learning insert dequeue +} diff --git a/hls4ml/templates/vitis_accelerator_ip_flow/myproject_axi.h b/hls4ml/templates/vitis_accelerator_ip_flow/myproject_axi.h new file mode 100644 index 0000000000..d0d88bfecf --- /dev/null +++ b/hls4ml/templates/vitis_accelerator_ip_flow/myproject_axi.h @@ -0,0 +1,10 @@ +#ifndef MYPROJECT_AXI_H_ +#define MYPROJECT_AXI_H_ + +#include +// hls-fpga-machine-learning insert include + +// hls-fpga-machine-learning insert definitions + +void myproject_axi(hls::stream &in, hls::stream &out); +#endif diff --git a/hls4ml/templates/vitis_accelerator_ip_flow/pynq-z2/python_drivers/axi_stream_driver.py b/hls4ml/templates/vitis_accelerator_ip_flow/pynq-z2/python_drivers/axi_stream_driver.py new file mode 100644 index 0000000000..1aac79f2d3 --- /dev/null +++ b/hls4ml/templates/vitis_accelerator_ip_flow/pynq-z2/python_drivers/axi_stream_driver.py @@ -0,0 +1,75 @@ +from datetime import datetime + +import numpy as np +from pynq import Overlay, allocate + + +class NeuralNetworkOverlay(Overlay): + def __init__( + self, bitfile_name, x_shape, y_shape, dtype=np.float32, dtbo=None, download=True, ignore_version=False, device=None + ): + super().__init__(bitfile_name, dtbo=None, download=True, ignore_version=False, device=None) + self.sendchannel = self.hier_0.axi_dma_0.sendchannel + self.recvchannel = self.hier_0.axi_dma_0.recvchannel + self.input_buffer = allocate(shape=x_shape, dtype=dtype) + self.output_buffer = allocate(shape=y_shape, dtype=dtype) + + def _print_dt(self, timea, timeb, N): + dt = timeb - timea + dts = dt.seconds + dt.microseconds * 10**-6 + rate = N / dts + print(f"Classified {N} samples in {dts} seconds ({rate} inferences / s)") + return dts, rate + + def predict(self, X, debug=False, profile=False, encode=None, decode=None): + """ + Obtain the predictions of the NN implemented in the FPGA. + Parameters: + - X : the input vector. Should be numpy ndarray. + - dtype : the data type of the elements of the input/output vectors. + Note: it should be set depending on the interface of the accelerator; if it uses 'float' + types for the 'data' AXI-Stream field, 'np.float32' dtype is the correct one to use. + Instead if it uses 'ap_fixed', 'np.intA' is the correct one to use (note that A cannot + any integer value, but it can assume {..., 8, 16, 32, ...} values. Check `numpy` + doc for more info). + In this case the encoding/decoding has to be computed by the PS. For example for + 'ap_fixed<16,6>' type the following 2 functions are the correct one to use for encode/decode + 'float' -> 'ap_fixed<16,6>': + ``` + def encode(xi): + return np.int16(round(xi * 2**10)) # note 2**10 = 2**(A-B) + def decode(yi): + return yi * 2**-10 + encode_v = np.vectorize(encode) # to apply them element-wise + decode_v = np.vectorize(decode) + ``` + - profile : boolean. Set it to `True` to print the performance of the algorithm in term of `inference/s`. + - encode/decode: function pointers. See `dtype` section for more information. + - return: an output array based on `np.ndarray` with a shape equal to `y_shape` and a `dtype` equal to + the namesake parameter. + """ + if profile: + timea = datetime.now() + if encode is not None: + X = encode(X) + self.input_buffer[:] = X + self.sendchannel.transfer(self.input_buffer) + self.recvchannel.transfer(self.output_buffer) + if debug: + print("Transfer OK") + self.sendchannel.wait() + if debug: + print("Send OK") + self.recvchannel.wait() + if debug: + print("Receive OK") + # result = self.output_buffer.copy() + if decode is not None: + self.output_buffer = decode(self.output_buffer) + + if profile: + timeb = datetime.now() + dts, rate = self._print_dt(timea, timeb, len(X)) + return self.output_buffer, dts, rate + else: + return self.output_buffer diff --git a/hls4ml/templates/vitis_accelerator_ip_flow/pynq-z2/tcl_scripts/axi_stream_design.tcl b/hls4ml/templates/vitis_accelerator_ip_flow/pynq-z2/tcl_scripts/axi_stream_design.tcl new file mode 100644 index 0000000000..e8db1e6782 --- /dev/null +++ b/hls4ml/templates/vitis_accelerator_ip_flow/pynq-z2/tcl_scripts/axi_stream_design.tcl @@ -0,0 +1,69 @@ +#@todo: try to remove startgroup and endgroup and see if it work +set tcldir [file dirname [info script]] +source [file join $tcldir project.tcl] + +create_project project_1 ${project_name}_vitis_accelerator_ip_flow -part xc7z020clg400-1 -force + +# set_property board_part tul.com.tw:pynq-z2:part0:1.0 [current_project] +set_property ip_repo_paths ${project_name}_prj [current_project] +update_ip_catalog + +create_bd_design "design_1" + +startgroup +create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 processing_system7_0 +endgroup + +apply_bd_automation -rule xilinx.com:bd_rule:processing_system7 -config {make_external "FIXED_IO, DDR" apply_board_preset "1" Master "Disable" Slave "Disable" } [get_bd_cells processing_system7_0] + +startgroup +set_property -dict [list \ + CONFIG.PCW_USE_S_AXI_HP0 {1} \ + CONFIG.PCW_USE_S_AXI_HP2 {1} \ +] [get_bd_cells processing_system7_0] +endgroup + +startgroup +create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 axi_dma_0 +endgroup + +set_property -dict [list CONFIG.c_m_axi_s2mm_data_width.VALUE_SRC USER] [get_bd_cells axi_dma_0] +set_property -dict [list \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axi_s2mm_data_width {64} \ + CONFIG.c_mm2s_burst_size {32} \ + CONFIG.c_sg_length_width {26} \ +] [get_bd_cells axi_dma_0] + +startgroup +create_bd_cell -type ip -vlnv xilinx.com:hls:${project_name}_axi:1.0 ${project_name}_axi_0 +endgroup + +connect_bd_intf_net [get_bd_intf_pins axi_dma_0/M_AXIS_MM2S] [get_bd_intf_pins ${project_name}_axi_0/in_r] +connect_bd_intf_net [get_bd_intf_pins ${project_name}_axi_0/out_r] [get_bd_intf_pins axi_dma_0/S_AXIS_S2MM] + +#todo: make clock a variable +startgroup +apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config { Clk_master {Auto} Clk_slave {Auto} Clk_xbar {Auto} Master {/processing_system7_0/M_AXI_GP0} Slave {/axi_dma_0/S_AXI_LITE} ddr_seg {Auto} intc_ip {New AXI Interconnect} master_apm {0}} [get_bd_intf_pins axi_dma_0/S_AXI_LITE] +apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config { Clk_master {Auto} Clk_slave {Auto} Clk_xbar {Auto} Master {/axi_dma_0/M_AXI_MM2S} Slave {/processing_system7_0/S_AXI_HP0} ddr_seg {Auto} intc_ip {New AXI Interconnect} master_apm {0}} [get_bd_intf_pins processing_system7_0/S_AXI_HP0] +apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config { Clk_master {Auto} Clk_slave {Auto} Clk_xbar {Auto} Master {/axi_dma_0/M_AXI_S2MM} Slave {/processing_system7_0/S_AXI_HP2} ddr_seg {Auto} intc_ip {New AXI Interconnect} master_apm {0}} [get_bd_intf_pins processing_system7_0/S_AXI_HP2] +apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/processing_system7_0/FCLK_CLK0 (50 MHz)} Freq {100} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins myproject_axi_0/ap_clk] +endgroup + +validate_bd_design + +open_bd_design {./${project_name}_vitis_accelerator_ip_flow/project_1.srcs/sources_1/bd/design_1/design_1.bd} + +make_wrapper -files [get_files ./${project_name}_vitis_accelerator_ip_flow/project_1.srcs/sources_1/bd/design_1/design_1.bd] -top + +add_files -norecurse ./${project_name}_vitis_accelerator_ip_flow/project_1.srcs/sources_1/bd/design_1/hdl/design_1_wrapper.v + +reset_run impl_1 +reset_run synth_1 +#todo: make number of jobs a variable +launch_runs impl_1 -to_step write_bitstream -jobs 10 +wait_on_run -timeout 360 impl_1 + +open_run impl_1 +report_utilization -file util.rpt -hierarchical -hierarchical_percentages diff --git a/hls4ml/templates/vitis_accelerator_ip_flow/zcu102/python_drivers/axi_stream_driver.py b/hls4ml/templates/vitis_accelerator_ip_flow/zcu102/python_drivers/axi_stream_driver.py new file mode 100644 index 0000000000..1d70e55406 --- /dev/null +++ b/hls4ml/templates/vitis_accelerator_ip_flow/zcu102/python_drivers/axi_stream_driver.py @@ -0,0 +1,83 @@ +from datetime import datetime + +import numpy as np +from pynq import PL, Overlay, allocate + + +class NeuralNetworkOverlay(Overlay): + def __init__(self, bitfile_name, dtbo=None, download=True, ignore_version=False, device=None): + super().__init__(bitfile_name, dtbo=None, download=True, ignore_version=False, device=None) + + def _print_dt(self, timea, timeb, N): + dt = timeb - timea + dts = dt.seconds + dt.microseconds * 10**-6 + rate = N / dts + print(f"Classified {N} samples in {dts} seconds ({rate} inferences / s)") + return dts, rate + + def reset_PL(): + PL.reset() + + def predict(self, X, y_shape, dtype=np.float32, debug=None, profile=False, encode=None, decode=None): + """ + Obtain the predictions of the NN implemented in the FPGA. + Parameters: + - X : the input vector. Should be numpy ndarray. + - y_shape : the shape of the output vector. Needed to the accelerator to set the TLAST bit properly and + for sizing the output vector shape. + - dtype : the data type of the elements of the input/output vectors. + Note: it should be set depending on the interface of the accelerator; if it uses 'float' + types for the 'data' AXI-Stream field, 'np.float32' dtype is the correct one to use. + Instead if it uses 'ap_fixed', 'np.intA' is the correct one to use (note that A cannot + any integer value, but it can assume {..., 8, 16, 32, ...} values. Check `numpy` + doc for more info). + In this case the encoding/decoding has to be computed by the PS. For example for + 'ap_fixed<16,6>' type the following 2 functions are the correct one to use for encode/decode + 'float' -> 'ap_fixed<16,6>': + ``` + def encode(xi): + return np.int16(round(xi * 2**10)) # note 2**10 = 2**(A-B) + def decode(yi): + return yi * 2**-10 + encode_v = np.vectorize(encode) # to apply them element-wise + decode_v = np.vectorize(decode) + ``` + - profile : boolean. Set it to `True` to print the performance of the algorithm in term of `inference/s`. + - encode/decode: function pointers. See `dtype` section for more information. + - return: an output array based on `np.ndarray` with a shape equal to `y_shape` and a `dtype` equal to + the namesake parameter. + """ + + if encode is not None: + X = encode(X) + with allocate(shape=X.shape, dtype=dtype) as input_buffer, allocate(shape=y_shape, dtype=dtype) as output_buffer: + input_buffer[:] = X + + if profile: + timea = datetime.now() + + self.axi_dma_0.sendchannel.transfer(input_buffer) + self.axi_dma_0.recvchannel.transfer(output_buffer) + if debug: + print("Transfer OK") + self.axi_dma_0.sendchannel.wait() + if debug: + print("Send OK") + self.axi_dma_0.recvchannel.wait() + + if profile: + timeb = datetime.now() + + if debug: + print("Receive OK") + + result = output_buffer.copy() + + if decode is not None: + result = decode(result) + + if profile: + dts, rate = self._print_dt(timea, timeb, len(X)) + return result, dts, rate + + return result diff --git a/hls4ml/templates/vitis_accelerator_ip_flow/zcu102/tcl_scripts/axi_stream_design.tcl b/hls4ml/templates/vitis_accelerator_ip_flow/zcu102/tcl_scripts/axi_stream_design.tcl new file mode 100644 index 0000000000..103fec0178 --- /dev/null +++ b/hls4ml/templates/vitis_accelerator_ip_flow/zcu102/tcl_scripts/axi_stream_design.tcl @@ -0,0 +1,65 @@ +#@todo: try to remove startgroup and endgroup and see if it work +set tcldir [file dirname [info script]] +source [file join $tcldir project.tcl] + +create_project project_1 ${project_name}_vivado_accelerator -part xczu9eg-ffvb1156-2-e -force + +set_property board_part xilinx.com:zcu102:part0:3.3 [current_project] +set_property ip_repo_paths ${project_name}_prj [current_project] +update_ip_catalog + +create_bd_design "design_1" +set_property ip_repo_paths ${project_name}_prj/solution1/impl/ip [current_project] +update_ip_catalog + +startgroup +create_bd_cell -type ip -vlnv xilinx.com:ip:zynq_ultra_ps_e:3.5 zynq_ultra_ps_e_1 +endgroup + +apply_bd_automation -rule xilinx.com:bd_rule:zynq_ultra_ps_e -config {apply_board_preset "1" } [get_bd_cells zynq_ultra_ps_e_1] + +set_property -dict [list \ + CONFIG.PSU__SAXIGP2__DATA_WIDTH {64} \ + CONFIG.PSU__SAXIGP4__DATA_WIDTH {64} \ + CONFIG.PSU__USE__S_AXI_GP2 {1} \ + CONFIG.PSU__USE__S_AXI_GP4 {1} \ +] [get_bd_cells zynq_ultra_ps_e_1] + +startgroup +create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 axi_dma_0 +endgroup + +set_property -dict [list CONFIG.c_m_axi_s2mm_data_width.VALUE_SRC USER] [get_bd_cells axi_dma_0] +set_property -dict [list \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axi_s2mm_data_width {64} \ + CONFIG.c_mm2s_burst_size {32} \ + CONFIG.c_sg_length_width {26} \ +] [get_bd_cells axi_dma_0] + +apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config { Clk_master {Auto} Clk_slave {Auto} Clk_xbar {Auto} Master {/zynq_ultra_ps_e_1/M_AXI_HPM0_FPD} Slave {/axi_dma_0/S_AXI_LITE} ddr_seg {Auto} intc_ip {New AXI Interconnect} master_apm {0}} [get_bd_intf_pins axi_dma_0/S_AXI_LITE] +apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config { Clk_master {Auto} Clk_slave {Auto} Clk_xbar {Auto} Master {/axi_dma_0/M_AXI_MM2S} Slave {/zynq_ultra_ps_e_1/S_AXI_HP0_FPD} ddr_seg {Auto} intc_ip {New AXI SmartConnect} master_apm {0}} [get_bd_intf_pins zynq_ultra_ps_e_1/S_AXI_HP0_FPD] +apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config { Clk_master {Auto} Clk_slave {Auto} Clk_xbar {Auto} Master {/axi_dma_0/M_AXI_S2MM} Slave {/zynq_ultra_ps_e_1/S_AXI_HP2_FPD} ddr_seg {Auto} intc_ip {New AXI SmartConnect} master_apm {0}} [get_bd_intf_pins zynq_ultra_ps_e_1/S_AXI_HP2_FPD] +apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config { Clk_master {Auto} Clk_slave {/zynq_ultra_ps_e_1/pl_clk0 (99 MHz)} Clk_xbar {/zynq_ultra_ps_e_1/pl_clk0 (99 MHz)} Master {/zynq_ultra_ps_e_1/M_AXI_HPM1_FPD} Slave {/axi_dma_0/S_AXI_LITE} ddr_seg {Auto} intc_ip {/ps8_0_axi_periph} master_apm {0}} [get_bd_intf_pins zynq_ultra_ps_e_1/M_AXI_HPM1_FPD] + +startgroup +create_bd_cell -type ip -vlnv xilinx.com:hls:${project_name}_axi:1.0 ${project_name}_axi_0 +endgroup + +connect_bd_intf_net [get_bd_intf_pins axi_dma_0/M_AXIS_MM2S] [get_bd_intf_pins ${project_name}_axi_0/in_r] +connect_bd_intf_net [get_bd_intf_pins axi_dma_0/S_AXIS_S2MM] [get_bd_intf_pins ${project_name}_axi_0/out_r] + +apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ultra_ps_e_1/pl_clk0 (99 MHz)} Freq {100} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins ${project_name}_axi_0/ap_clk] + +make_wrapper -files [get_files ./${project_name}_vivado_accelerator/project_1.srcs/sources_1/bd/design_1/design_1.bd] -top + +add_files -norecurse ./${project_name}_vivado_accelerator/project_1.srcs/sources_1/bd/design_1/hdl/design_1_wrapper.v + +reset_run impl_1 +reset_run synth_1 +launch_runs impl_1 -to_step write_bitstream -jobs 6 +wait_on_run -timeout 360 impl_1 + +open_run impl_1 +report_utilization -file util.rpt -hierarchical -hierarchical_percentages diff --git a/hls4ml/templates/vivado/ap_types/ap_axi_sdata.h b/hls4ml/templates/vivado/ap_types/ap_axi_sdata.h new file mode 100755 index 0000000000..e01c8a8cd1 --- /dev/null +++ b/hls4ml/templates/vivado/ap_types/ap_axi_sdata.h @@ -0,0 +1,441 @@ +// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 +/***************************************************************************** + * + * Author: Xilinx, Inc. + * + * This text contains proprietary, confidential information of + * Xilinx, Inc. , is distributed by under license from Xilinx, + * Inc., and may be used, copied and/or disclosed only pursuant to + * the terms of a valid license agreement with Xilinx, Inc. + * + * XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS" + * AS A COURTESY TO YOU, SOLELY FOR USE IN DEVELOPING PROGRAMS AND + * SOLUTIONS FOR XILINX DEVICES. BY PROVIDING THIS DESIGN, CODE, + * OR INFORMATION AS ONE POSSIBLE IMPLEMENTATION OF THIS FEATURE, + * APPLICATION OR STANDARD, XILINX IS MAKING NO REPRESENTATION + * THAT THIS IMPLEMENTATION IS FREE FROM ANY CLAIMS OF INFRINGEMENT, + * AND YOU ARE RESPONSIBLE FOR OBTAINING ANY RIGHTS YOU MAY REQUIRE + * FOR YOUR IMPLEMENTATION. XILINX EXPRESSLY DISCLAIMS ANY + * WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE + * IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR + * REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF + * INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE. + * + * Xilinx products are not intended for use in life support appliances, + * devices, or systems. Use in such applications is expressly prohibited. + * +#- (c) Copyright 2011-2022 Xilinx, Inc. All rights reserved. +#- +#- This file contains confidential and proprietary information +#- of Xilinx, Inc. and is protected under U.S. and +#- international copyright and other intellectual property +#- laws. +#- +#- DISCLAIMER +#- This disclaimer is not a license and does not grant any +#- rights to the materials distributed herewith. Except as +#- otherwise provided in a valid license issued to you by +#- Xilinx, and to the maximum extent permitted by applicable +#- law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND +#- WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES +#- AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING +#- BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON- +#- INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and +#- (2) Xilinx shall not be liable (whether in contract or tort, +#- including negligence, or under any other theory of +#- liability) for any loss or damage of any kind or nature +#- related to, arising under or in connection with these +#- materials, including for any direct, or any indirect, +#- special, incidental, or consequential loss or damage +#- (including loss of data, profits, goodwill, or any type of +#- loss or damage suffered as a result of any action brought +#- by a third party) even if such damage or loss was +#- reasonably foreseeable or Xilinx had been advised of the +#- possibility of the same. +#- +#- CRITICAL APPLICATIONS +#- Xilinx products are not designed or intended to be fail- +#- safe, or for use in any application requiring fail-safe +#- performance, such as life-support or safety devices or +#- systems, Class III medical devices, nuclear facilities, +#- applications related to the deployment of airbags, or any +#- other applications that could lead to death, personal +#- injury, or severe property or environmental damage +#- (individually and collectively, "Critical +#- Applications"). Customer assumes the sole risk and +#- liability of any use of Xilinx products in Critical +#- Applications, subject only to applicable laws and +#- regulations governing limitations on product liability. +#- +#- THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS +#- PART OF THIS FILE AT ALL TIMES. +#- ************************************************************************ + + * + *****************************************************************************/ + +/* + * This file contains the definition of the data types for AXI streaming. + * ap_axi_s is a signed interpretation of the AXI stream + * ap_axi_u is an unsigned interpretation of the AXI stream + */ + +#ifndef __AP__AXI_SDATA__ +#define __AP__AXI_SDATA__ + +#include +#include "ap_int.h" +//#include "ap_fixed.h" +template +struct ap_fixed; +template +struct ap_ufixed; + +namespace hls { + +template constexpr std::size_t bitwidth = sizeof(T) * CHAR_BIT; + +template constexpr std::size_t bitwidth> = W; +template constexpr std::size_t bitwidth> = W; +template +constexpr std::size_t bitwidth> = _AP_W; +template +constexpr std::size_t bitwidth> = _AP_W; + +template +constexpr std::size_t bytewidth = (bitwidth + CHAR_BIT - 1) / CHAR_BIT; + +template struct axis { + static constexpr std::size_t NewWUser = (WUser == 0) ? 1 : WUser; + static constexpr std::size_t NewWId = (WId == 0) ? 1 : WId; + static constexpr std::size_t NewWDest = (WDest == 0) ? 1 : WDest; + T data; + ap_uint> keep; + ap_uint> strb; + ap_uint user; + ap_uint<1> last; + ap_uint id; + ap_uint dest; + + ap_uint *get_user_ptr() { +#pragma HLS inline + return (WUser == 0) ? nullptr : &user; + } + ap_uint *get_id_ptr() { +#pragma HLS inline + return (WId == 0) ? nullptr : &id; + } + ap_uint *get_dest_ptr() { +#pragma HLS inline + return (WDest == 0) ? nullptr : &dest; + } +}; + +} // namespace hls + +template +using ap_axis = hls::axis, WUser, WId, WDest>; + +template +using ap_axiu = hls::axis, WUser, WId, WDest>; + +// Isolate out qdma_axis from hls::axis for special APIs. +template +struct qdma_axis; + +template struct qdma_axis { + // private: + static constexpr std::size_t kBytes = (WData + 7) / 8; + + ap_uint data; + ap_uint keep; + ap_uint<1> strb; + ap_uint<1> user; + ap_uint<1> last; + ap_uint<1> id; + ap_uint<1> dest; + + ap_uint<1> *get_strb_ptr() { +#pragma HLS inline + return nullptr; + } + ap_uint<1> *get_user_ptr() { +#pragma HLS inline + return nullptr; + } + ap_uint<1> *get_id_ptr() { +#pragma HLS inline + return nullptr; + } + ap_uint<1> *get_dest_ptr() { +#pragma HLS inline + return nullptr; + } + + // public: + ap_uint get_data() const { +#pragma HLS inline + return data; + } + ap_uint get_keep() const { +#pragma HLS inline + return keep; + } + ap_uint<1> get_last() const { +#pragma HLS inline + return last; + } + + void set_data(const ap_uint &d) { +#pragma HLS inline + data = d; + } + void set_keep(const ap_uint &k) { +#pragma HLS inline + keep = k; + } + void set_last(const ap_uint<1> &l) { +#pragma HLS inline + last = l; + } + void keep_all() { +#pragma HLS inline + ap_uint k = 0; + keep = ~k; + } + + qdma_axis() { +#pragma HLS inline + ; + } + qdma_axis(ap_uint d) : data(d) { +#pragma HLS inline + ; + } + qdma_axis(ap_uint d, ap_uint k) : data(d), keep(k) { +#pragma HLS inline + ; + } + qdma_axis(ap_uint d, ap_uint k, ap_uint<1> l) + : data(d), keep(k), last(l) { +#pragma HLS inline + ; + } + qdma_axis(const qdma_axis &d) + : data(d.data), keep(d.keep), last(d.last) { +#pragma HLS inline + ; + } + qdma_axis &operator=(const qdma_axis &d) { +#pragma HLS inline + data = d.data; + keep = d.keep; + last = d.last; + return *this; + } +}; + +#ifdef AESL_SYN +#if ((__clang_major__ != 3) || (__clang_minor__ != 1)) +#include "hls_stream.h" +namespace hls { + +template +class stream> final { + typedef axis __STREAM_T__; + +public: + /// Constructors + INLINE stream() {} + + INLINE stream(const char *name) { (void)name; } + + /// Make copy constructor and assignment operator private +private: + INLINE stream(const stream<__STREAM_T__> &chn) : V(chn.V) {} + +public: + /// Overload >> and << operators to implement read() and write() + INLINE void operator>>(__STREAM_T__ &rdata) { read(rdata); } + + INLINE void operator<<(const __STREAM_T__ &wdata) { write(wdata); } + + /// empty & full + bool empty() { +#pragma HLS inline + bool tmp = __fpga_axis_valid(&V.data, &V.keep, &V.strb, V.get_user_ptr(), + &V.last, V.get_id_ptr(), V.get_dest_ptr()); + return !tmp; + } + + bool full() { +#pragma HLS inline + bool tmp = __fpga_axis_ready(&V.data, &V.keep, &V.strb, V.get_user_ptr(), + &V.last, V.get_id_ptr(), V.get_dest_ptr()); + return !tmp; + } + + /// Blocking read + void read(__STREAM_T__ &dout) { +#pragma HLS inline + __STREAM_T__ tmp; + __fpga_axis_pop(&V.data, &V.keep, &V.strb, V.get_user_ptr(), &V.last, + V.get_id_ptr(), V.get_dest_ptr(), &tmp.data, &tmp.keep, + &tmp.strb, tmp.get_user_ptr(), &tmp.last, tmp.get_id_ptr(), + tmp.get_dest_ptr()); + dout = tmp; + } + + __STREAM_T__ read() { +#pragma HLS inline + __STREAM_T__ tmp; + __fpga_axis_pop(&V.data, &V.keep, &V.strb, V.get_user_ptr(), &V.last, + V.get_id_ptr(), V.get_dest_ptr(), &tmp.data, &tmp.keep, + &tmp.strb, tmp.get_user_ptr(), &tmp.last, tmp.get_id_ptr(), + tmp.get_dest_ptr()); + return tmp; + } + + /// Blocking write + void write(const __STREAM_T__ &din) { +#pragma HLS inline + __STREAM_T__ tmp = din; + __fpga_axis_push(&V.data, &V.keep, &V.strb, V.get_user_ptr(), &V.last, + V.get_id_ptr(), V.get_dest_ptr(), &tmp.data, &tmp.keep, + &tmp.strb, tmp.get_user_ptr(), &tmp.last, tmp.get_id_ptr(), + tmp.get_dest_ptr()); + } + + /// Non-Blocking read + bool read_nb(__STREAM_T__ &dout) { +#pragma HLS inline + __STREAM_T__ tmp; + if (__fpga_axis_nb_pop(&V.data, &V.keep, &V.strb, V.get_user_ptr(), &V.last, + V.get_id_ptr(), V.get_dest_ptr(), &tmp.data, + &tmp.keep, &tmp.strb, tmp.get_user_ptr(), + &tmp.last, tmp.get_id_ptr(), tmp.get_dest_ptr())) { + dout = tmp; + return true; + } else { + return false; + } + } + + /// Non-Blocking write + bool write_nb(const __STREAM_T__ &in) { +#pragma HLS inline + __STREAM_T__ tmp = in; + bool full_n = __fpga_axis_nb_push( + &V.data, &V.keep, &V.strb, V.get_user_ptr(), &V.last, V.get_id_ptr(), + V.get_dest_ptr(), &tmp.data, &tmp.keep, &tmp.strb, tmp.get_user_ptr(), + &tmp.last, tmp.get_id_ptr(), tmp.get_dest_ptr()); + return full_n; + } + +private: + __STREAM_T__ V NO_CTOR; +}; + +// specialization for qdma +template +class stream> { + typedef qdma_axis __STREAM_T__; + +public: + /// Constructors + INLINE stream() {} + + INLINE stream(const char *name) { (void)name; } + + /// Make copy constructor and assignment operator private +private: + INLINE stream(const stream<__STREAM_T__> &chn) : V(chn.V) {} + +public: + /// Overload >> and << operators to implement read() and write() + INLINE void operator>>(__STREAM_T__ &rdata) { read(rdata); } + + INLINE void operator<<(const __STREAM_T__ &wdata) { write(wdata); } + + /// empty & full + bool empty() { +#pragma HLS inline + bool tmp = __fpga_axis_valid(&V.data, &V.keep, V.get_strb_ptr(), V.get_user_ptr(), + &V.last, V.get_id_ptr(), V.get_dest_ptr()); + return !tmp; + } + + bool full() { +#pragma HLS inline + bool tmp = __fpga_axis_ready(&V.data, &V.keep, V.get_strb_ptr(), V.get_user_ptr(), + &V.last, V.get_id_ptr(), V.get_dest_ptr()); + return !tmp; + } + + /// Blocking read + void read(__STREAM_T__ &dout) { +#pragma HLS inline + __STREAM_T__ tmp; + __fpga_axis_pop(&V.data, &V.keep, V.get_strb_ptr(), V.get_user_ptr(), + &V.last, V.get_id_ptr(), V.get_dest_ptr(), &tmp.data, + &tmp.keep, tmp.get_strb_ptr(), tmp.get_user_ptr(), + &tmp.last, tmp.get_id_ptr(), tmp.get_dest_ptr()); + dout = tmp; + } + + __STREAM_T__ read() { +#pragma HLS inline + __STREAM_T__ tmp; + __fpga_axis_pop(&V.data, &V.keep, V.get_strb_ptr(), V.get_user_ptr(), &V.last, + V.get_id_ptr(), V.get_dest_ptr(), &tmp.data, &tmp.keep, + tmp.get_strb_ptr(), tmp.get_user_ptr(), &tmp.last, tmp.get_id_ptr(), + tmp.get_dest_ptr()); + return tmp; + } + + /// Blocking write + void write(const __STREAM_T__ &din) { +#pragma HLS inline + __STREAM_T__ tmp = din; + __fpga_axis_push(&V.data, &V.keep, V.get_strb_ptr(), V.get_user_ptr(), &V.last, + V.get_id_ptr(), V.get_dest_ptr(), &tmp.data, &tmp.keep, + tmp.get_strb_ptr(), tmp.get_user_ptr(), &tmp.last, tmp.get_id_ptr(), + tmp.get_dest_ptr()); + } + + /// Non-Blocking read + bool read_nb(__STREAM_T__ &dout) { +#pragma HLS inline + __STREAM_T__ tmp; + + if (__fpga_axis_nb_pop(&V.data, &V.keep, &V.strb, V.get_user_ptr(), &V.last, + V.get_id_ptr(), V.get_dest_ptr(), &tmp.data, + &tmp.keep, &tmp.strb, tmp.get_user_ptr(), + &tmp.last, tmp.get_id_ptr(), tmp.get_dest_ptr())) { + dout = tmp; + return true; + } else { + return false; + } + } + + /// Non-Blocking write + bool write_nb(const __STREAM_T__ &in) { +#pragma HLS inline + __STREAM_T__ tmp = in; + bool full_n = __fpga_axis_nb_push( + &V.data, &V.keep, V.get_strb_ptr(), V.get_user_ptr(), &V.last, V.get_id_ptr(), + V.get_dest_ptr(), &tmp.data, &tmp.keep, tmp.get_strb_ptr(), tmp.get_user_ptr(), + &tmp.last, tmp.get_id_ptr(), tmp.get_dest_ptr()); + return full_n; + } + +private: + __STREAM_T__ V NO_CTOR; +}; + +} // namespace hls +#endif +#endif +#endif diff --git a/hls4ml/templates/vivado/build_prj.tcl b/hls4ml/templates/vivado/build_prj.tcl index af37b0f4aa..50596091f2 100644 --- a/hls4ml/templates/vivado/build_prj.tcl +++ b/hls4ml/templates/vivado/build_prj.tcl @@ -206,7 +206,7 @@ if {$opt(cosim)} { set time_end [clock clicks -milliseconds] puts "INFO:" - if {[string equal "$backend" "vivadoaccelerator"]} { + if {[string equal "$backend" "vivadoaccelerator"] || [string equal $backend "vitisacceleratoripflow"]} { puts [read [open ${project_name}_prj/solution1/sim/report/${project_name}_axi_cosim.rpt r]] } else { puts [read [open ${project_name}_prj/solution1/sim/report/${project_name}_cosim.rpt r]] diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_helpers.h b/hls4ml/templates/vivado/nnet_utils/nnet_helpers.h index b8c2a48d19..2a695d4e5a 100644 --- a/hls4ml/templates/vivado/nnet_utils/nnet_helpers.h +++ b/hls4ml/templates/vivado/nnet_utils/nnet_helpers.h @@ -11,6 +11,11 @@ #include #include +// this header cannot be included by Vivado HLS +// "VITIS_ACCELERATOR_IP_FLOW" is defined on the build_lib.sh of the `Vitis Accelerator` template files +#ifdef VITIS_ACCELERATOR_IP_FLOW +#include "ap_axi_sdata.h" +#endif namespace nnet { #ifndef __SYNTHESIS__ @@ -161,6 +166,26 @@ template void convert_data(hls::stre } } +#ifdef VITIS_ACCELERATOR_IP_FLOW +// todo avoid hardcoding hls::axis and use template +template +void convert_data(srcType *src, hls::stream> &dst) { + for (size_t i = 0; i < SIZE; i++) { + hls::axis ctype; + ctype.data = dstType(src[i]); + dst.write(ctype); + } +} + +template +void convert_data(hls::stream> &src, dstType *dst) { + for (size_t i = 0; i < SIZE; i++) { + hls::axis ctype = src.read(); + dst[i] = dstType(ctype.data); + } +} +#endif + extern bool trace_enabled; extern std::map *trace_outputs; extern size_t trace_type_size; @@ -247,8 +272,6 @@ template void save_layer_output(hls::stream &data, const } } -#endif - template void copy_data(std::vector src, dst_T dst[SIZE]) { typename std::vector::const_iterator in_begin = src.cbegin() + OFFSET; typename std::vector::const_iterator in_end = in_begin + SIZE; @@ -272,14 +295,27 @@ void copy_data(std::vector src, hls::stream &dst) { } template void copy_data_axi(std::vector src, dst_T dst[SIZE]) { - for (auto i = 0; i < SIZE; i++) + for (auto i = 0; i < SIZE; i++) { + dst[i].data = src[i]; if (i == SIZE - 1) { - dst[i].data = src[i]; dst[i].last = 1; } else { - dst[i].data = src[i]; dst[i].last = 0; } + } +} + +template void copy_data_axi(std::vector src, hls::stream &dst) { + for (auto i = 0; i < SIZE; i++) { + dst_T pack; + pack.data = src[i]; + if (i == SIZE - 1) { + pack.last = 1; + } else { + pack.last = 0; + } + dst.write(pack); + } } template void print_result(res_T result[SIZE], std::ostream &out, bool keep = false) { @@ -289,21 +325,55 @@ template void print_result(res_T result[SIZE], std::o out << std::endl; } -template void print_result(hls::stream &result, std::ostream &out, bool keep = false) { +template ::value, int>::type = 0> +void print_result(hls::stream &result, std::ostream &out, bool keep = false) { for (int i = 0; i < SIZE / res_T::size; i++) { res_T res_pack = result.read(); for (int j = 0; j < res_T::size; j++) { out << res_pack[j] << " "; } - if (keep) + if (keep) { + result.write(res_pack); + } + } + out << std::endl; +} + +// compatible with Vitis Accelerator for res_T = hls::axis<...> and io_parallel +template ::value, int>::type = 0> +void print_result(hls::stream &result, std::ostream &out, bool keep = false) { + for (int i = 0; i < SIZE; i++) { + res_T res_pack = result.read(); + + out << res_pack.data << " "; + + if (keep) { result.write(res_pack); + } + } + out << std::endl; +} + +// compatible with Vitis Accelerator for res_T = hls::axis and io_stream +template +void print_result(hls::stream &result, std::ostream &out, bool keep = false) { + for (int i = 0; i < SIZE / underlying_res_T::size; i++) { + res_T res_pack; + for (int j = 0; j < underlying_res_T::size; j++) { + res_pack = result.read(); + out << res_pack.data << " "; + if (keep) { + result.write(res_pack); + } + } } out << std::endl; } template void fill_zero(data_T data[SIZE]) { std::fill_n(data, SIZE, 0.); } -template void fill_zero(hls::stream &data) { +template ::value, int>::type = 0> +void fill_zero(hls::stream &data) { for (int i = 0; i < SIZE / data_T::size; i++) { data_T data_pack; for (int j = 0; j < data_T::size; j++) { @@ -313,6 +383,36 @@ template void fill_zero(hls::stream &data) { } } +template ::value, int>::type = 0> +void fill_zero(hls::stream &data) { + for (int i = 0; i < SIZE; i++) { + data_T data_pack; + data_pack.data = 0.; + if (i == SIZE - 1) { + data_pack.last = 1; + } else { + data_pack.last = 0; + } + data.write(data_pack); + } +} + +// compatible with Vitis Accelerator for res_T = hls::axis +template void fill_zero(hls::stream &data) { + for (int i = 0; i < SIZE / underlying_data_T::size; i++) { + data_T data_pack; + for (int j = 0; j < underlying_data_T::size; j++) { + data_pack.data = 0.; + if ((i == (SIZE / underlying_data_T::size - 1)) && (j == (underlying_data_T::size - 1))) { + data_pack.last = 1; + } else { + data_pack.last = 0; + } + data.write(data_pack); + } + } +} + template int read_file_1D(const char *filename, dataType data[nrows]) { FILE *fp; fp = fopen(filename, "r"); @@ -370,6 +470,7 @@ template void hls_stream_debug(hls::stream &dat res << datareg; } } +#endif constexpr int ceillog2(int x) { return (x <= 2) ? 1 : 1 + ceillog2((x + 1) / 2); } diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_sepconv1d_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_sepconv1d_stream.h index ca3143d01e..11622efbf0 100644 --- a/hls4ml/templates/vivado/nnet_utils/nnet_sepconv1d_stream.h +++ b/hls4ml/templates/vivado/nnet_utils/nnet_sepconv1d_stream.h @@ -109,7 +109,7 @@ void separable_conv_1d_cl(hls::stream &data, hls::stream &res, #pragma HLS DATAFLOW hls::stream depthwise_res; - unsigned res_depth = CONFIG_T::depthwise_config::out_width; + const unsigned res_depth = CONFIG_T::depthwise_config::out_width; #pragma HLS STREAM variable=depthwise_res depth=res_depth depthwise_conv_1d_cl(data, depthwise_res, depthwise_weights, diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_sepconv2d_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_sepconv2d_stream.h index 7f4dd866c9..f5cafd2ee7 100644 --- a/hls4ml/templates/vivado/nnet_utils/nnet_sepconv2d_stream.h +++ b/hls4ml/templates/vivado/nnet_utils/nnet_sepconv2d_stream.h @@ -133,7 +133,7 @@ void separable_conv_2d_cl(hls::stream &data, hls::stream &res, #pragma HLS DATAFLOW hls::stream depthwise_res; - unsigned res_depth = CONFIG_T::depthwise_config::out_height * CONFIG_T::depthwise_config::out_width; + const unsigned res_depth = CONFIG_T::depthwise_config::out_height * CONFIG_T::depthwise_config::out_width; #pragma HLS STREAM variable=depthwise_res depth=res_depth depthwise_conv_2d_cl(data, depthwise_res, depthwise_weights, diff --git a/hls4ml/writer/__init__.py b/hls4ml/writer/__init__.py index f16cccc9fa..31238b18c8 100644 --- a/hls4ml/writer/__init__.py +++ b/hls4ml/writer/__init__.py @@ -1,6 +1,7 @@ from hls4ml.writer.catapult_writer import CatapultWriter from hls4ml.writer.quartus_writer import QuartusWriter from hls4ml.writer.symbolic_writer import SymbolicExpressionWriter +from hls4ml.writer.vitis_accelerator_ip_flow_writer import VitisAcceleratorIPFlowWriter from hls4ml.writer.vitis_writer import VitisWriter from hls4ml.writer.vivado_accelerator_writer import VivadoAcceleratorWriter from hls4ml.writer.vivado_writer import VivadoWriter @@ -9,6 +10,7 @@ register_writer('Vivado', VivadoWriter) register_writer('VivadoAccelerator', VivadoAcceleratorWriter) register_writer('Vitis', VitisWriter) +register_writer('VitisAcceleratorIPFlow', VitisAcceleratorIPFlowWriter) register_writer('Quartus', QuartusWriter) register_writer('Catapult', CatapultWriter) register_writer('SymbolicExpression', SymbolicExpressionWriter) diff --git a/hls4ml/writer/vitis_accelerator_ip_flow_writer.py b/hls4ml/writer/vitis_accelerator_ip_flow_writer.py new file mode 100644 index 0000000000..78e1fa982d --- /dev/null +++ b/hls4ml/writer/vitis_accelerator_ip_flow_writer.py @@ -0,0 +1,387 @@ +import os +from distutils.dir_util import copy_tree +from shutil import copyfile + +# from hls4ml.writer.vivado_writer import VivadoWriter +from hls4ml.writer.vitis_writer import VitisWriter + + +class VitisAcceleratorIPFlowWriter(VitisWriter): + def __init__(self): + super().__init__() + self.vitis_accelerator_ip_flow_config = None + + def write_axi_wrapper(self, model): + '''Write a top level HLS C++ file to wrap the hls4ml project with AXI interfaces + Args: + model : The ModelGraph to write the wrapper for + ''' + inp_axi_t, out_axi_t, inp, out = self.vitis_accelerator_ip_flow_config.get_corrected_types() + indent = ' ' + + ####################### + # myproject_axi.h + ####################### + + filedir = os.path.dirname(os.path.abspath(__file__)) + f = open(os.path.join(filedir, '../templates/vitis_accelerator_ip_flow/myproject_axi.h')) + fout = open(f'{model.config.get_output_dir()}/firmware/{model.config.get_project_name()}_axi.h', 'w') + + for line in f.readlines(): + if 'MYPROJECT' in line: + newline = line.replace('MYPROJECT', format(model.config.get_project_name().upper())) + elif '// hls-fpga-machine-learning insert include' in line: + newline = f'#include "{model.config.get_project_name()}.h"\n' + newline += '#include "ap_axi_sdata.h"\n' + elif 'myproject' in line: + newline = line.replace('myproject', model.config.get_project_name()) + elif '// hls-fpga-machine-learning insert definitions' in line: + newline = '' + newline += f'static const unsigned N_IN = {inp.size()};\n' + newline += f'static const unsigned N_OUT = {out.size()};\n' + if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': + newline += 'typedef hls::axis my_pkt;\n' + # might need to make "float" a variable according to the + # configuration set by the user and the DMA available data widths + else: # TODO: handle this case + newline += f'typedef {inp_axi_t} input_axi_t;\n' + newline += f'typedef {out_axi_t} output_axi_t;\n' + else: + newline = line + fout.write(newline) + f.close() + fout.close() + + ####################### + # myproject_axi.cpp + ####################### + + f = open(os.path.join(filedir, '../templates/vitis_accelerator_ip_flow/myproject_axi.cpp')) + fout = open(f'{model.config.get_output_dir()}/firmware/{model.config.get_project_name()}_axi.cpp', 'w') + + io_type = model.config.get_config_value("IOType") + + for line in f.readlines(): + if 'myproject' in line: + newline = line.replace('myproject', model.config.get_project_name()) + elif '// hls-fpga-machine-learning insert include' in line: + newline = f'#include "{model.config.get_project_name()}_axi.h"\n' + elif '// hls-fpga-machine-learning insert local vars' in line: + newline = '' + if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': + newline += indent + 'bool is_last = false;\n' + if io_type == 'io_parallel': # TODO: handle io_parallel + newline += indent + inp.type.name + ' in_local[N_IN];\n' + newline += indent + out.type.name + ' out_local[N_OUT];\n' + newline += indent + 'my_pkt tmp;\n' + elif io_type == 'io_stream': + newline += indent + 'hls::stream<' + inp.type.name + '> in_local("input_1");\n' + newline += indent + 'hls::stream<' + out.type.name + '> out_local("output_1");\n\n' + newline += indent + '#pragma HLS STREAM variable=in_local depth={}\n'.format( + model.get_input_variables()[0].pragma[1] + ) + newline += indent + '#pragma HLS STREAM variable=out_local depth={}\n'.format( + model.get_output_variables()[0].pragma[1] + ) + elif '// hls-fpga-machine-learning insert call' in line: + newline = indent + f'{model.config.get_project_name()}(in_local, out_local);\n' + elif '// hls-fpga-machine-learning insert interface' in line: + if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_lite': # TODO: handle axi_lite + newline = '' + newline += indent + '#pragma HLS INTERFACE ap_ctrl_none port=return\n' + newline += indent + '#pragma HLS INTERFACE s_axilite port=in\n' + newline += indent + '#pragma HLS INTERFACE s_axilite port=out\n' + elif self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_master': # TODO: handle axi_master + newline = '' + newline += indent + '#pragma HLS INTERFACE s_axilite port=return bundle=CTRL_BUS\n' + newline += indent + '#pragma HLS INTERFACE m_axi depth={} port=in offset=slave bundle=IN_BUS\n'.format( + model.get_input_variables()[0].pragma[1] + ) + newline += indent + '#pragma HLS INTERFACE m_axi depth={} port=out offset=slave bundle=OUT_BUS\n'.format( + model.get_output_variables()[0].pragma[1] + ) + elif self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': + newline = '' + newline += indent + '#pragma HLS INTERFACE axis port=in\n' + newline += indent + '#pragma HLS INTERFACE axis port=out\n' + newline += indent + '#pragma HLS INTERFACE ap_ctrl_none port=return\n' + if model.config.get_config_value("IOType") == 'io_stream': + newline += indent + '#pragma HLS DATAFLOW\n' + elif '// hls-fpga-machine-learning insert enqueue' in line: + io_type = model.config.get_config_value("IOType") + if io_type == 'io_parallel': # TODO: handle io_parallel + newline = '' + newline += indent + 'for(unsigned i = 0; i < N_IN; i++){\n' + if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': + newline += indent + indent + '#pragma HLS PIPELINE\n' + newline += indent + indent + 'tmp = in.read(); // Read input with cast\n' + newline += indent + indent + 'in_local[i] = tmp.data;\n' + newline += indent + indent + 'is_last = tmp.last;\n' + else: + newline += indent + indent + '#pragma HLS UNROLL\n' + newline += indent + indent + 'in_local[i] = in[i].data; // Read input with cast\n' + newline += indent + '}\n' + newline += indent + 'tmp.last = 0;\n' + elif io_type == 'io_stream': + newline = '' + newline += indent + 'my_pkt tmp;\n' + + newline += indent + 'for(unsigned i = 0; i < N_IN / {input_t}::size; ++i) {{\n' + # newline += indent + indent + '#pragma HLS PIPELINE\n' # TODO: check if needed + newline += indent + indent + '{input_t} ctype;\n' + # newline += indent + indent + '#pragma HLS DATA_PACK variable=ctype\n' + # newline += indent + indent + 'pragma HLS aggregate variable=ctype compact=auto' # TODO: check if needed + newline += indent + indent + 'for(unsigned j = 0; j < {input_t}::size; j++) {{\n' + # newline += indent + indent + indent + '#pragma HLS UNROLL\n' # TODO: check if needed + if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': + newline += indent + indent + indent + 'in.read(tmp);\n' + newline += indent + indent + indent + 'ctype[j] = tmp.data;\n' + newline += indent + indent + indent + 'is_last = tmp.last;\n' + else: # TODO: handle this case + newline += ( + indent + + indent + + indent + + 'ctype[j] = typename {input_t}::value_type(in[i * {input_t}::size + j].data);\n' + ) + newline += indent + indent + '}}\n' + newline += indent + indent + 'in_local.write(ctype);\n' + newline += indent + '}}\n' + newline += indent + 'tmp.last = 0;\n' + newline = newline.format(input_t=inp.type.name) + elif '// hls-fpga-machine-learning insert dequeue' in line: + io_type = model.config.get_config_value("IOType") + if io_type == 'io_parallel': # TODO: handle this case + newline = '' + newline += indent + 'for(unsigned i = 0; i < N_OUT; i++){\n' + if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': + newline += indent + indent + '#pragma HLS PIPELINE\n' + newline += indent + indent + 'tmp.data = out_local[i];\n' + newline += indent + indent + 'tmp.last = (is_last && (i == N_OUT - 1))? true : false;\n' + newline += indent + indent + 'out.write(tmp);\n' + else: + newline += indent + indent + '#pragma HLS UNROLL\n' + newline += indent + indent + 'out[i] = out_local[i]; // Write output with cast\n' + newline += indent + '}\n' + elif io_type == 'io_stream': + newline = '' + newline += indent + 'for(unsigned i = 0; i < N_OUT / {result_t}::size; ++i) {{\n' + # newline += indent + indent + '#pragma HLS PIPELINE\n' + newline += indent + indent + '{result_t} ctype = out_local.read();\n' + newline += indent + indent + 'for(unsigned j = 0; j < {result_t}::size; j++) {{\n' + # newline += indent + indent + indent + '#pragma HLS UNROLL\n' + if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': + newline += indent + indent + indent + f'tmp.data = ({inp_axi_t}) (ctype[j]);\n' + + newline += indent + indent + indent + 'if(is_last) {{tmp.last = (((i+1)*(j+1))==N_OUT);}}\n' + + newline += indent + indent + indent + 'out.write(tmp);\n' + else: + newline += indent + indent + indent + 'out[i * {result_t}::size + j] = output_axi_t(ctype[j]);\n' + newline += indent + indent + '}}\n' + newline += indent + '}}\n' + newline = newline.format(result_t=out.type.name) + else: + newline = line + fout.write(newline) + f.close() + fout.close() + + def modify_build_script(self, model): + ''' + Modify the build_prj.tcl and build_lib.sh scripts to add the extra wrapper files and set the top function + ''' + filedir = os.path.dirname(os.path.abspath(__file__)) + oldfile = f'{model.config.get_output_dir()}/build_prj.tcl' + newfile = f'{model.config.get_output_dir()}/build_prj_axi.tcl' + f = open(oldfile) + fout = open(newfile, 'w') + + for line in f.readlines(): + if 'set_top' in line: + newline = line[:-1] + '_axi\n' # remove the newline from the line end and append _axi for the new top + newline += f'add_files firmware/{model.config.get_project_name()}_axi.cpp -cflags "-std=c++0x"\n' + elif f'{model.config.get_project_name()}_cosim' in line: + newline = line.replace( + f'{model.config.get_project_name()}_cosim', + f'{model.config.get_project_name()}_axi_cosim', + ) + elif '${project_name}.tcl' in line: + newline = line.replace('${project_name}.tcl', '${project_name}_axi.tcl') + else: + newline = line + fout.write(newline) + + f.close() + fout.close() + os.rename(newfile, oldfile) + + ################### + # build_lib.sh + ################### + + f = open(os.path.join(filedir, '../templates/vitis_accelerator_ip_flow/build_lib.sh')) + fout = open(f'{model.config.get_output_dir()}/build_lib.sh', 'w') + + for line in f.readlines(): + line = line.replace('myproject', model.config.get_project_name()) + line = line.replace('mystamp', model.config.get_config_value('Stamp')) + + fout.write(line) + f.close() + fout.close() + + def write_wrapper_test(self, model): + ################### + # write myproject_test_wrapper.cpp + ################### + oldfile = f'{model.config.get_output_dir()}/{model.config.get_project_name()}_test.cpp' + newfile = f'{model.config.get_output_dir()}/{model.config.get_project_name()}_test_wrapper.cpp' + + inp_axi_t, out_axi_t, inp, out = self.vitis_accelerator_ip_flow_config.get_corrected_types() + + f = open(oldfile) + fout = open(newfile, 'w') + + inp = model.get_input_variables()[0] + out = model.get_output_variables()[0] + io_type = model.config.get_config_value("IOType") + + for line in f.readlines(): + if f'{model.config.get_project_name()}.h' in line: + newline = line.replace(f'{model.config.get_project_name()}.h', f'{model.config.get_project_name()}_axi.h') + elif inp.definition_cpp() in line: + newline = line.replace( + inp.definition_cpp(), 'hls::stream< my_pkt > inputs' + ) # TODO instead of replacing strings, how about we use proper variables and their definition? + elif out.definition_cpp() in line: + newline = line.replace(out.definition_cpp(), 'hls::stream< my_pkt > outputs') + elif 'unsigned short' in line: + newline = '' + elif f'{model.config.get_project_name()}(' in line: + indent_amount = line.split(model.config.get_project_name())[0] + newline = indent_amount + f'{model.config.get_project_name()}_axi(inputs,outputs);\n' + elif inp.size_cpp() in line or inp.name in line or inp.type.name in line: + newline = line.replace(inp.size_cpp(), 'N_IN').replace(inp.name, 'inputs').replace(inp.type.name, 'my_pkt') + elif out.size_cpp() in line or out.name in line or out.type.name in line: + newline = line.replace(out.size_cpp(), 'N_OUT').replace(out.name, 'outputs').replace(out.type.name, 'my_pkt') + else: + newline = line + if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': + if 'copy_data' in line: + newline = newline.replace('copy_data', 'copy_data_axi').replace("0,", "") + + if io_type == 'io_stream': + if 'nnet::fill_zero' in line: + newline = newline.replace("nnet::fill_zero<", f"nnet::fill_zero<{inp.type.name}, ") + # indent = line.split('n')[0] + # newline = indent + indent + 'inputs[N_IN-1].last = 1;\n' + if 'print_result' in line: + newline = newline.replace("print_result<", f"print_result<{out.type.name}, ") + fout.write(newline) + + f.close() + fout.close() + os.rename(newfile, oldfile) + + ################### + # write myproject_bridge_wrapper.cpp + ################### + oldfile = f'{model.config.get_output_dir()}/{model.config.get_project_name()}_bridge.cpp' + newfile = f'{model.config.get_output_dir()}/{model.config.get_project_name()}_bridge_wrapper.cpp' + + f = open(oldfile) + fout = open(newfile, 'w') + + inp = model.get_input_variables()[0] + out = model.get_output_variables()[0] + + for line in f.readlines(): + if f'{model.config.get_project_name()}.h' in line: + newline = line.replace(f'{model.config.get_project_name()}.h', f'{model.config.get_project_name()}_axi.h') + elif inp.definition_cpp(name_suffix='_ap') in line: + newline = line.replace(inp.definition_cpp(name_suffix='_ap'), f'hls::stream< my_pkt > {inp.name}_ap') + elif out.definition_cpp(name_suffix='_ap') in line: + newline = line.replace(out.definition_cpp(name_suffix='_ap'), f'hls::stream< my_pkt > {out.name}_ap') + elif f'{model.config.get_project_name()}(' in line: + indent_amount = line.split(model.config.get_project_name())[0] + newline = indent_amount + '{}_axi({}_ap,{}_ap);\n'.format( + model.config.get_project_name(), inp.name, out.name + ) + elif inp.size_cpp() in line or inp.name in line or inp.type.name in line: + newline = line.replace(inp.size_cpp(), 'N_IN').replace(inp.type.name, inp_axi_t) + elif out.size_cpp() in line or out.name in line or out.type.name in line: + newline = line.replace(out.size_cpp(), 'N_OUT').replace(out.type.name, out_axi_t) + else: + newline = line + fout.write(newline) + + f.close() + fout.close() + os.rename(newfile, oldfile) + + def write_board_script(self, model): + ''' + Write the tcl scripts and kernel sources to create a Vivado IPI project for the VitisAcceleratorIPFlow + ''' + filedir = os.path.dirname(os.path.abspath(__file__)) + copyfile( + os.path.join(filedir, self.vitis_accelerator_ip_flow_config.get_tcl_file_path()), + f'{model.config.get_output_dir()}/design.tcl', + ) + # Generic alveo board + if self.vitis_accelerator_ip_flow_config.get_board().startswith('alveo'): + src_dir = os.path.join(filedir, self.vitis_accelerator_ip_flow_config.get_krnl_rtl_src_dir()) + dst_dir = os.path.abspath(model.config.get_output_dir()) + '/src' + copy_tree(src_dir, dst_dir) + + ################### + # project.tcl + ################### + f = open(f'{model.config.get_output_dir()}/project.tcl', 'w') + f.write('variable project_name\n') + f.write(f'set project_name "{model.config.get_project_name()}"\n') + f.write('variable backend\n') + f.write('set backend "vitisacceleratoripflow"\n') + f.write('variable part\n') + f.write(f'set part "{self.vitis_accelerator_ip_flow_config.get_part()}"\n') + f.write('variable clock_period\n') + f.write('set clock_period {}\n'.format(model.config.get_config_value('ClockPeriod'))) + f.write('variable clock_uncertainty\n') + f.write('set clock_uncertainty {}\n'.format(model.config.get_config_value('ClockUncertainty', '12.5%'))) + f.write('variable version\n') + f.write('set version "{}"\n'.format(model.config.get_config_value('Version', '1.0.0'))) + if self.vitis_accelerator_ip_flow_config.get_interface() == 'axi_stream': + in_bit, out_bit = self.vitis_accelerator_ip_flow_config.get_io_bitwidth() + f.write(f'set bit_width_hls_output {in_bit}\n') + f.write(f'set bit_width_hls_input {out_bit}\n') + f.close() + + def write_driver(self, model): + filedir = os.path.dirname(os.path.abspath(__file__)) + copyfile( + os.path.join(filedir, self.vitis_accelerator_ip_flow_config.get_driver_path()), + ('{}/' + self.vitis_accelerator_ip_flow_config.get_driver_file()).format(model.config.get_output_dir()), + ) + + def write_new_tar(self, model): + # os.remove(model.config.get_output_dir() + '.tar.gz') + super().write_tar(model) + + def write_hls(self, model): + """ + Write the HLS project. Calls the VivadoBackend writer, and extra steps for VitisAcceleratorIPFlow/AXI interface + """ + # TODO temporarily move config import here to avoid cyclic dependency, until config is moved to its own package + from hls4ml.backends import VitisAcceleratorIPFlowConfig + + self.vitis_accelerator_ip_flow_config = VitisAcceleratorIPFlowConfig( + model.config, model.get_input_variables(), model.get_output_variables() + ) + super().write_hls(model) + self.write_board_script(model) + self.write_driver(model) + self.write_wrapper_test(model) + self.write_axi_wrapper(model) + self.modify_build_script(model) + self.write_new_tar(model)