forked from Xilinx/Vitis-Tutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Squashed 'utils' changes from 698eb8e..9fb2945 (#705)
9fb2945 remove HOST_ARCH from docs (Xilinx#279) 8dc4ec3 Merge pull request Xilinx#278 from FaaSApps/cr_1135044_1 d440a75 fix description.json for cuboid_read_hls and cuboid_write_hls 17c5fb0 Merge pull request Xilinx#277 from FaaSApps/cr_1135044 d50704a remove unsupported properties 6b83ed6 Merge pull request Xilinx#276 from FaaSApps/fix_4d_2 96e8661 fix cosim error caused by stack size ac90d72 Merge pull request Xilinx#275 from FaaSApps/fix_4d b227e8a fix 4d datamover cc84882 Clean unused files from docs in next branch (Xilinx#274) be607f4 fix bug in doc -- remove build dir and update index.rst (Xilinx#273) 6161a00 add mem (Xilinx#272) d8fa6fa build rst and html 79230a9 fix doc (Xilinx#271) 5a49d7f Merge pull request Xilinx#270 from tuol/2022.2_features 6a534d9 add datamover L1 tests 017c789 update doc in next branch for portal (Xilinx#269) 596d99d Merge pull request Xilinx#268 from liyuanz/next 91996c3 update 3371605 Merge pull request Xilinx#266 from changg/22.1_mks 8fc7d8a fix u280 case 9710bb4 22.2 update mk 1b0c33d change 2022.1_stable_latest to 2022.2_stable_latest 2301ab6 Merge pull request Xilinx#263 from tuol/fix_tutorial b1b1c22 remove fix platform in tutorial 03797e0 Merge pull request Xilinx#262 from tuol/fix_ttl 38e737d fix ttl 472feb6 Merge pull request Xilinx#260 from tuol/fix_conf_py 0bfb2be update version in conf.py Co-authored-by: sdausr <[email protected]>
- Loading branch information
2 people
authored and
GitHub Enterprise
committed
Sep 16, 2022
1 parent
a646710
commit d031f49
Showing
55 changed files
with
2,961 additions
and
798 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
@Library('pipeline-library')_ | ||
VitisLibPipeline (branch: 'next', libname: 'xf_utils_hw', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build', | ||
devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_stable_latest', mail_on:'daily:PR') | ||
devtest: 'RunDeploy.sh', TOOLVERSION: '2022.2_stable_latest', mail_on:'daily:PR') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,341 @@ | ||
/* | ||
* Copyright 2022 Xilinx, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
/** | ||
* @file pl_datamover.hpp | ||
* @brief This file provides load data from AXI master to AXI stream and vice versa. | ||
* | ||
* This file is part of Vitis Utility Library | ||
*/ | ||
|
||
#ifndef XF_UTILS_HW_PL_DATAMOVER_HPP | ||
#define XF_UTILS_HW_PL_DATAMOVER_HPP | ||
|
||
#include <ap_int.h> | ||
#include <ap_axi_sdata.h> | ||
#include <hls_stream.h> | ||
#include <hls_burst_maxi.h> | ||
|
||
namespace xf { | ||
namespace common { | ||
namespace utils_hw { | ||
namespace details { | ||
|
||
/** | ||
* A bit more about manual burst / write design | ||
* Both take "request params + data" and operate on burst_maxi | ||
* Both will act like "request at first, close request later" | ||
* This means there'll be requests on the fly, and they need buffer. | ||
* | ||
* One important limit to be noticed: | ||
* burst maxi port will first "cut" request into sub request, if data request ran across 4KB border. | ||
* Outstanding are upperbound of such sub request, in pragma for burst maxi port | ||
* To avoid deadlock caused by sending too much request, request from HLS side need to be less than outstanding. | ||
* | ||
* In actual design, burst maxi width is no bigger than 64 Bytes (512 bits). | ||
* If BURSTLEN <= 64, then total size of one quest is less than 64 * 64 = 4096 Byte | ||
* So actual sub request is at most twice as much as request from HLS side. | ||
* So we could at least create (outstanding / 2) HLS requests is safe to be handled. | ||
* | ||
* But BURSTLEN should not be too small, for best bandwidth, at least 16. | ||
*/ | ||
|
||
/** | ||
* Read from stream and create burst write request to DDR/HBM | ||
* | ||
* @tparam WDATA width of MAXI port. | ||
* @tparam LATENCY latency of MAXI port. | ||
* @tparam OUTSTANDING read outstanding of MAXI port, should be less than 512. | ||
* @tparam BURSTLEN read burst length of MAXI port, should be less than 64. | ||
* | ||
* @param r_offset, stream to get offset for burst write | ||
* @param r_burst, stream ot get length for burst write, should be no bigger BURSTLEN | ||
* @param e_r, end of write request | ||
* @param w_data, stream to read data for writing | ||
* @param data MAXI port for writing | ||
*/ | ||
|
||
template <int WDATA, int LATENCY, int OUTSTANDING, int BURSTLEN> | ||
void manualBurstWrite( // input | ||
hls::stream<ap_uint<64> >& r_offset, | ||
hls::stream<ap_uint<10> >& r_burst, | ||
hls::stream<bool>& e_r, | ||
hls::stream<ap_axiu<WDATA, 0, 0, 0> >& w_data, | ||
// output | ||
hls::burst_maxi<ap_uint<WDATA> >& data) { | ||
ap_uint<LATENCY> check = 0; | ||
ap_uint<10> req_left = OUTSTANDING / 2; | ||
ap_uint<10> write_left = 0; | ||
ap_uint<10> burst_record[1024]; | ||
ap_uint<10> rec_head = 0; | ||
ap_uint<10> rec_tail = 0; | ||
bool last = e_r.read(); | ||
|
||
ACC_BURST_REQ_LOOP: | ||
// TODO: accumulate so many request at first might cause deadlock, to be fixed. | ||
while (!last && rec_tail != OUTSTANDING / 2) { | ||
#pragma HLS pipeline II = 1 | ||
ap_uint<64> tmp_offset = r_offset.read(); | ||
ap_uint<64> tmp_burst = r_burst.read(); | ||
last = e_r.read(); | ||
burst_record[rec_tail++] = tmp_burst; | ||
data.write_request(tmp_offset, tmp_burst); | ||
req_left--; | ||
} | ||
if (rec_tail != 0) { | ||
write_left = burst_record[rec_head++]; | ||
} | ||
|
||
BURST_WRITE_LOOP: | ||
while (!last || write_left != 0 || rec_head != rec_tail) { | ||
#pragma HLS pipeline II = 1 | ||
bool check_l = check[LATENCY - 1]; | ||
check <<= 1; | ||
|
||
if (write_left != 0) { // load data and write if possible | ||
ap_axiu<WDATA, 0, 0, 0> tmp_data = w_data.read(); | ||
data.write(tmp_data.data); // haha, too many data | ||
|
||
if (--write_left == 0) { // if all data of current write request has been sent | ||
check[0] = 1; | ||
if (rec_head != rec_tail) { | ||
write_left = burst_record[rec_head++]; | ||
} | ||
} | ||
} | ||
|
||
if (check_l) { // if a write request has become old enough | ||
data.write_response(); | ||
|
||
if (!last) { | ||
ap_uint<64> tmp_offset = r_offset.read(); | ||
ap_uint<64> tmp_burst = r_burst.read(); | ||
last = e_r.read(); | ||
burst_record[rec_tail++] = tmp_burst; | ||
data.write_request(tmp_offset, tmp_burst); | ||
} else { | ||
req_left++; | ||
} | ||
} | ||
} | ||
|
||
while (check != 0) { | ||
bool check_l = check[LATENCY - 1]; | ||
check <<= 1; | ||
if (check_l) { | ||
data.write_response(); | ||
req_left--; | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Create burst read request to DDR/HBM and write to stream | ||
* | ||
* @tparam WDATA width of MAXI port. | ||
* @tparam LATENCY latency of MAXI port. | ||
* @tparam OUTSTANDING read outstanding of MAXI port, should be less than 512. | ||
* @tparam BURSTLEN read burst length of MAXI port, should be less than 64. | ||
* | ||
* @param data MAXI port for reading | ||
* @param r_offset, stream to get offset for burst read | ||
* @param r_burst, stream ot get length for burst read, should be no bigger BURSTLEN | ||
* @param e_r, end of read request | ||
* @param w_data, stream to write read result | ||
*/ | ||
|
||
template <int WDATA, int LATENCY, int OUTSTANDING, int BURSTLEN> | ||
void manualBurstRead( // input | ||
hls::burst_maxi<ap_uint<WDATA> >& data, | ||
hls::stream<ap_uint<64> >& r_offset, | ||
hls::stream<ap_uint<10> >& r_burst, | ||
hls::stream<bool>& e_r, | ||
// output | ||
hls::stream<ap_axiu<WDATA, 0, 0, 0> >& w_data) { | ||
ap_uint<LATENCY> check = 0; // delay check | ||
ap_uint<10> req_left = OUTSTANDING / 2; // how many more request could be issued, "/2" is to avoid hang | ||
ap_uint<10> req_ready = 0; // how many request should be ready, according to delay check | ||
ap_uint<10> burst_record[1024]; // burstlen record | ||
ap_uint<10> rec_head = 0; // record head | ||
ap_uint<10> rec_tail = 0; // record tail | ||
ap_uint<10> read_left = 0; // read left in 1 burst | ||
bool last = e_r.read(); | ||
|
||
BURST_READ_LOOP: | ||
while (!last || req_ready != 0 || check != 0) { | ||
#pragma HLS pipeline II = 1 | ||
//#pragma HLS pipeline II = 1 | ||
|
||
bool check_l = check[LATENCY - 1]; | ||
check <<= 1; | ||
|
||
if (req_left != 0 && !last) { // if read outstanding is not exhausted, issue more request | ||
ap_uint<64> tmp_offset = r_offset.read(); | ||
ap_uint<10> tmp_burst = r_burst.read(); | ||
last = e_r.read(); | ||
|
||
data.read_request(tmp_offset, tmp_burst); | ||
check[0] = 1; | ||
req_left--; | ||
burst_record[rec_tail++] = tmp_burst; | ||
} | ||
|
||
if (req_ready != 0 || read_left != 0) { // if there's mature req | ||
if (read_left == 0) { | ||
read_left = burst_record[rec_head++] - 1; | ||
req_ready--; | ||
req_left++; | ||
} else { | ||
read_left--; | ||
} | ||
|
||
ap_axiu<WDATA, 0, 0, 0> tmp_data; | ||
tmp_data.data = data.read(); | ||
tmp_data.keep = -1; | ||
tmp_data.last = 0; | ||
w_data.write(tmp_data); | ||
} | ||
|
||
if (check_l) { // if a new request has become old enough | ||
req_ready++; | ||
} | ||
} | ||
|
||
while (read_left != 0) { | ||
#pragma HLS pipeline II = 1 | ||
ap_axiu<WDATA, 0, 0, 0> tmp_data; | ||
tmp_data.data = data.read(); | ||
tmp_data.keep = -1; | ||
tmp_data.last = 0; | ||
read_left--; | ||
w_data.write(tmp_data); | ||
} | ||
} | ||
|
||
template <int BURSTLEN> | ||
void cmdParser(hls::burst_maxi<ap_uint<64> > descriptor, | ||
hls::stream<ap_uint<64> >& r_offset, | ||
hls::stream<ap_uint<10> >& r_burst, | ||
hls::stream<bool>& e_r) { | ||
descriptor.read_request(0, 1); | ||
ap_uint<64> cmd_nums = descriptor.read(); | ||
ap_uint<64> cmd_buf_ptr = 1; | ||
|
||
if (cmd_nums != 0) { | ||
descriptor.read_request(cmd_buf_ptr, 9); | ||
} | ||
|
||
for (int cmd_idx = 0; cmd_idx < cmd_nums; cmd_idx++) { | ||
ap_uint<64> cfg[9]; | ||
for (int i = 0; i < 9; i++) { | ||
cfg[i] = descriptor.read(); | ||
} | ||
cmd_buf_ptr += 9; | ||
descriptor.read_request(cmd_buf_ptr, 9); | ||
|
||
ap_uint<64>& offset = cfg[0]; | ||
ap_uint<64>& i1 = cfg[1]; | ||
ap_uint<64>& d1 = cfg[2]; | ||
ap_uint<64>& i2 = cfg[3]; | ||
ap_uint<64>& d2 = cfg[4]; | ||
ap_uint<64>& i3 = cfg[5]; | ||
ap_uint<64>& d3 = cfg[6]; | ||
ap_uint<64>& i4 = cfg[7]; | ||
ap_uint<64>& d4 = cfg[8]; | ||
|
||
ap_uint<64> x_inc; | ||
if (i1 == 1) { | ||
x_inc = BURSTLEN; | ||
} else { | ||
x_inc = 1; | ||
} | ||
|
||
for (ap_uint<64> w = 0; w < d4; w++) { | ||
ap_uint<64> s4 = offset + w * i4; | ||
for (ap_uint<64> z = 0; z < d3; z++) { | ||
ap_uint<64> s3 = s4 + z * i3; | ||
for (ap_uint<64> y = 0; y < d2; y++) { | ||
ap_uint<64> s2 = s3 + y * i2; | ||
for (ap_uint<64> x = 0; x < d1; x += x_inc) { | ||
#pragma HLS pipeline II = 1 | ||
ap_uint<64> s1 = s2 + x * i1; | ||
|
||
ap_uint<10> burst; | ||
if (i1 == 1) { | ||
if ((x + BURSTLEN) <= d1) { | ||
burst = BURSTLEN; | ||
} else { | ||
burst = d1 - x; | ||
} | ||
} else { | ||
burst = 1; | ||
} | ||
|
||
r_offset.write(s1); | ||
r_burst.write(burst); | ||
e_r.write(false); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
e_r.write(true); | ||
} | ||
} // namespace details | ||
|
||
template <int WDATA, int LATENCY, int OUTSTANDING, int BURSTLEN> | ||
void read4D( | ||
// input | ||
hls::burst_maxi<ap_uint<64> >& descriptor_buffer, | ||
hls::burst_maxi<ap_uint<WDATA> >& data, | ||
// ouput | ||
hls::stream<ap_axiu<WDATA, 0, 0, 0> >& w_data) { | ||
#pragma HLS dataflow | ||
hls::stream<ap_uint<64> > r_offset("r_offset"); | ||
#pragma HLS stream variable = r_offset depth = OUTSTANDING | ||
hls::stream<ap_uint<10> > r_burst("r_burst"); | ||
#pragma HLS stream variable = r_burst depth = OUTSTANDING | ||
hls::stream<bool> e_r("e_r"); | ||
#pragma HLS stream variable = e_r depth = OUTSTANDING | ||
|
||
details::cmdParser<BURSTLEN>(descriptor_buffer, r_offset, r_burst, e_r); | ||
details::manualBurstRead<WDATA, LATENCY, OUTSTANDING, BURSTLEN>(data, r_offset, r_burst, e_r, w_data); | ||
} | ||
|
||
template <int WDATA, int LATENCY, int OUTSTANDING, int BURSTLEN> | ||
void write4D( | ||
// input | ||
hls::burst_maxi<ap_uint<64> >& descriptor_buffer, | ||
hls::stream<ap_axiu<WDATA, 0, 0, 0> >& w_data, | ||
// ouput | ||
hls::burst_maxi<ap_uint<WDATA> >& data) { | ||
#pragma HLS dataflow | ||
hls::stream<ap_uint<64> > r_offset("r_offset"); | ||
#pragma HLS stream variable = r_offset depth = OUTSTANDING | ||
hls::stream<ap_uint<10> > r_burst("r_burst"); | ||
#pragma HLS stream variable = r_burst depth = OUTSTANDING | ||
hls::stream<bool> e_r("e_r"); | ||
#pragma HLS stream variable = e_r depth = OUTSTANDING | ||
|
||
details::cmdParser<BURSTLEN>(descriptor_buffer, r_offset, r_burst, e_r); | ||
details::manualBurstWrite<WDATA, LATENCY, OUTSTANDING, BURSTLEN>(r_offset, r_burst, e_r, w_data, data); | ||
} | ||
|
||
} // namespace utils_hw | ||
} // namespace commcon | ||
} // namespace xf | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.