Skip to content

Commit

Permalink
Merge pull request #20 from dachang/v1.3
Browse files Browse the repository at this point in the history
V1.3
  • Loading branch information
Dachang Li authored and GitHub Enterprise committed Nov 25, 2020
2 parents 33d6cf9 + 28534bc commit 929bb36
Show file tree
Hide file tree
Showing 17 changed files with 182 additions and 106 deletions.
7 changes: 7 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"C_Cpp.errorSquiggles": "Disabled",
"files.associations": {
"chrono": "cpp",
"*.tcc": "cpp"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace vitis{namespace ai{
auto image_from_device_start = std::chrono::system_clock::now();
auto image=v4l2_videoCapture();
LOG(INFO)<<"Decode and Resize :"<<std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now() - image_from_device_start).count();
std::chrono::system_clock::now() - image_from_device_start).count()<<"ms";
LOG_IF(INFO, ENV_PARAM(DEBUG_DEMO)) << "1080p decode queue size " << queue_->size();
while (!queue_->push(FrameInfo{channel_id_, ++frame_id_, image[1], image[0]},
std::chrono::milliseconds(500))) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace vitis {
}

int DpuThread::run() {
auto dpu_start = std::chrono::system_clock::now();
FrameInfo frame;
if (!queue_in_->pop(frame, std::chrono::milliseconds(500))) {
return 0;
Expand All @@ -31,7 +32,8 @@ namespace vitis {
}
LOG_IF(INFO, ENV_PARAM(DEBUG_DEMO))
<< "dpu queue size " << queue_out_->size();

LOG(INFO)<<"DPU :"<<std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now() - dpu_start).count()<<"ms";
while (!queue_out_->push(frame, std::chrono::milliseconds(500))) {
if (is_stopped()) {
return -1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ OTHER DEALINGS IN THE SOFTWARE.

uint8_t *V4l2Capture::out_buf_0 = (uint8_t *)malloc(OUT_RESIZE_WIDTH * OUT_RESIZE_HEIGHT * 3);
uint8_t *V4l2Capture::out_buf_1 = (uint8_t *)malloc(OUT_WIDTH * OUT_HEIGHT * 3);
uint8_t *V4l2Capture::out_buf_back_0 = (uint8_t *)malloc(OUT_RESIZE_WIDTH * OUT_RESIZE_HEIGHT * 3);
uint8_t *V4l2Capture::out_buf_back_1 = (uint8_t *)malloc(OUT_WIDTH * OUT_HEIGHT * 3);
unsigned int V4l2Capture::resize_size = (OUT_RESIZE_WIDTH * OUT_RESIZE_HEIGHT * 3);
unsigned int V4l2Capture::full_size = (OUT_WIDTH * OUT_HEIGHT * 3);
bool V4l2Capture::xocl_initialized = false;
Expand Down Expand Up @@ -107,8 +105,18 @@ V4l2Capture::V4l2Capture(V4l2Device *device) : V4l2Access(device)
// -----------------------------------------
V4l2Capture::~V4l2Capture()
{
free(out_buf_0);
free(out_buf_1);

if(out_buf_0 != NULL)
{
free(out_buf_0);
out_buf_0 = NULL;
}
if(out_buf_1 != NULL)
{
free(out_buf_1);
out_buf_1 = NULL;
}

}

// -----------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,9 @@ clWaitForEvents(1, (const cl_event *)&event_sp);
cl_ulong start = 0;
cl_ulong end = 0;
double diff_prof = 0.0f;
event_sp.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
event_sp.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
diff_prof = end - start;
// event_sp.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
// event_sp.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
// diff_prof = end - start;
std::cout << "Latency: " << (diff_prof / 1000000) << "ms" << std::endl;

q.enqueueReadBuffer(resizeFromDevice, CL_TRUE, 0, resize_size, out_buf_0);
Expand Down
109 changes: 41 additions & 68 deletions Machine_Learning/Introduction/03-Basic/Module_7/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# 3.7 Acceleration with HLS kernel
---

***Version: Vitis 2020.1+Vitis AI 1.2***
***Version: Vitis 2020.2+Vitis AI 1.3***

**_Note_**: You need to use the new image you generated in Section_3-Module_7. After the OS boot succesfully, you can refer to [quick_start](https://github.com/Xilinx/Vitis-AI/tree/master/Vitis-AI-Library#quick-start-for-edge) guide to learn how to prepare the development evironment.
This example suite, for the Vitis AI Library and Vitis Accelerated Kernel, shows how to use the Vitis AI Library runs neural networks on DPUs and how to use the HLS kernel to speed up pre/... Postprocessing. About how to immigrate from OpenCV to HLS, please refer to [app/README.md](app/README.md)
Expand Down Expand Up @@ -79,20 +79,23 @@ The directory structure and brief explanations as below:

## Pre-Requirement

<<<<<<< HEAD
=======
- Hardware required:
- ZCU104 evaluation board
- Micro-USB cable, connect to lattop for the terminal emulator.
- SD card to burn the OS image.
- Software required:
- Vitis 2020.1 [Vitis Core Development Kit](https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vitis/2020-1.html) install in server.
- Vitis 2020.2 [Vitis Core Development Kit](https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vitis/2020-2.html) install in server.
- [Silicon Labs quad CP210x USB-to-UART bridge driver](https://www.silabs.com/products/development-tools/software/usb-to-uart-bridge-vcp-drivers) install in laptop.
- Serial terminal emulator e.g. teraterm, Mobaxterm install in laptop
- [XRT 2020.1](https://github.com/Xilinx/XRT/tree/2020.1) install in laptop
- [XRT 2020.2](https://github.com/Xilinx/XRT/tree/2020.2) install in laptop
- [zcu104 base platform](https://github.com/Xilinx/Vitis_Embedded_Platform_Source/tree/master/Xilinx_Official_Platforms/zcu104_base) install in server
- [Vitis AI runtime package](https://www.xilinx.com/bin/public/openDownload?filename=vitis-ai-runtime-1.2.1.tar.gz) base on VAI1.2
- [Vitis AI runtime package](https://www.xilinx.com/bin/public/openDownload?filename=vitis-ai-runtime-1.3.tar.gz) base on VAI1.3
- [Vitis AI model packages ](https://github.com/Xilinx/Vitis-AI/tree/master/Vitis-AI-Library#quick-start-for-edge)for ZCU104
- [dpu_sw_optimize.tar.gz](https://github.com/Xilinx/Vitis-AI/blob/master/DPU-TRD/app/dpu_sw_optimize.tar.gz) Running zynqmp_dpu_optimize.sh to optimize the board setting
---
>>>>>>> b87524238f61dc2b5e4df2fcd93573a70d6fd925
## Workflow Overview
- Step 1: Create Base Platform
Expand All @@ -111,107 +114,77 @@ Also note that the default PetaLinux configuration uses local scratchpad areas.
After cloning the platform source, and with both Vivado and PetaLinux set up, run ***make*** from the top-level platform directory. In this case we choose ZCU104_Base platform.


### Installing the Yocto SDK ###

A bundled Yocto SDK "sysroot" is not available with this package by default. To build non-trivial Linux software for this platform sysroot need to be built and installed. This can be done with command "make peta_sysroot" It is installed to "platform_repo/sysroot" once the build completes.

To cross-compile against this platform from the command line, source the environment-setup-aarch64-xilinx-linux script to set up your environment (cross compiler, build tools, libraries, etc).
###Step 2: DPU and HLS kernel integration

### Build instructions
- Clone the repository of vitis-ai-staging and vitis_library

This packages comes with sources to generate hardware specification file (xsa) from Vivado, petalinux sources to generate the image.ub and platform sources to generate the Vitis platform.
```
$ git clone -b ML-2605-folder-structure [email protected]:Vitis/vitis-ai-staging.git
$ cd vitis-ai-staging/dsa/DPU-TRD/prj/Vitis/
$ git clone https://github.com/Xilinx/Vitis_Libraries.git
Build platform from scratch: make all
Build a platform without modifying hardware:
```
make petalinux_proj XSA_DIR= make pfm XSA_DIR=
- download the [mpsoc common system](https://www.xilinx.com/member/forms/download/xef.html?filename=xilinx-zynqmp-common-v2020.1.tar.gz), and unzip the files.

```
$ tar -xvzf xilinx-zynqmp-common-v2020.1.tar.gz
$ cd xilinx-zynqmp-common-v2020.1
$ gzip -d rootfs.ext4.gz
```
example:
make petalinux_proj XSA_DIR=/home/user/zcu104_dpu/vivado
make pfm /home/user/zcu104_dpu/vivado
```
---

###Step 2: DPU and HLS kernel integration

- Use below commands to clone the Vitis-AI from github project.
- copy the kernel source code and config files below to DPU-TRD work directory

```
$ git clone [email protected]:Xilinx/Vitis-AI.git
$ cp ${Module_7}/kernel/build/Makefile vitis-ai-staging/dsa/DPU-TRD/prj/Vitis
$ cp ${Module_7}/kernel/build/preprocessor_config.ini vitis-ai-staging/dsa/DPU-TRD/prj/Vitis
$ cp ${Module_7}/kernel/build/dpu_conf.vh vitis-ai-staging/dsa/DPU-TRD/prj/Vitis
$ cp ${Module_7}/kernel/src/pre_processor.cpp vitis-ai-staging/dsa/DPU-TRD/prj/Vitis
$ cp ${Module_7}/kernel/src/pre_processor.h vitis-ai-staging/dsa/DPU-TRD/prj/Vitis
```

- copy the files below to work directory
- Step4: Run the below commands to start IP integration and wait for it to complete.

```
$ cp ${Module_7}/kernel/build/* ${Vitis-AI/DPU-TRD}DPU-TRD/prj/Vitis
$ cp ${Module_7}/kernel/src/* ${Vitis-AI/DPU-TRD}DPU-TRD/prj/Vitis
$ cp ${Module_7}/kernel/src/* ${Vitis-AI/DPU-TRD}DPU-TRD/prj/Vitis/config_file
export SDX_PLATFORM=<path-to-platform>
export EDGE_COMMON_SW=<path-to-edge_common_sw>
make KERNEL=DPU DEVICE=ZCU104
```
- Step3: Git clone the vitis library
There are two methods to set up the target environment
1. Replace the BOOT.BIN and the dpu.xclbin on target.
```
cd ${Vitis-AI/DPU-TRD}DPU-TRD/prj/Vitis
git clone https://github.com/Xilinx/Vitis_Libraries.git
```
- Step4: Run the below commands to start IP integration and wait for it to complete.
- Step5: Export the platform path
$ scp vitis-ai-staging/dsa/DPU-TRD/prj/Vitis/binary_container_1/BOOT.BIN root@<zcu104 board ip>:/mnt/sd-mmcblk0p1/
$ scp vitis-ai-staging/dsa/DPU-TRD/prj/Vitis/binary_container_1/dpu.xclbin root@<zcu104 board ip>:/mnt/sd-mmcblk0p1/
```
$ cd ${Module_7}/kernel/src/* ${Vitis-AI/DPU-TRD}DPU-TRD/prj/Vitis
make -j
```


- Step5: Use etcher or other tools to burn the image to 16GB sd card.
- Step6: Set the Mode to SD card

```
Rev 1.0: SW6[4:1] - off, off, off, on
Rev D2: SW6[4:1] - on, off, on, off
```

2. Burn the SD card with the image generated in ***vitis-ai-staging/dsa/DPU-TRD/prj/Vitis/binary_container_1/sd_card.img***

## Step 3: Cross Compiler Environtment Set up and Build application
---
- Install the SDK package.
- Install the SDK package and the Vitis AI cross compile package

```
$sh sdk.sh
$sh sdk-2020.2.0.0.sh
$tar -xvzf vitis_ai_2020.2-r1.3.0.tar.gz -C <path-to-sysroot>/
```

- After installing the sdk package, we use the script to set up the cross-compile environemnt by using the following commands.
```
$ source <full_path_of_sdk>/environment-setup-aarch64-xilinx-linux
$ cd ${Section_3-Basic}/Module_7/
$ sh build_app.sh
$ sh build_app_hls.sh
```

### Step 4:Board development setting.

Regarding the detail about how to set up the environment of the Board you can refer to [Module_3/README.md](../../Module_3/README.md)

Copy the executable file and the libraries to board.
- Copy the test yuv file to target.

```
$ scp ${Section_3-Basic}/Module_7/build/usb_input_multi_threads_refinedet_drm root@$[IP_OF_BOARD]:/home/root
$ scp ${Section_3-Basic}/Module_7/build/myVHLS_V4l2s/libhls_v4l2s.so root@$[IP_OF_BOARD]:/usr/lib
$ scp test.yuv root@<zcu104 ip>:/home/root
```
- Download the board [optimized package](https://github.com/Xilinx/Vitis-AI/blob/master/DPU-TRD/app/dpu_sw_optimize.tar.gz) and run the script to complete the environment set up.

Running zynqmp_dpu_optimize.sh on board to optimize the board setting.
```
# scp dpu_sw_optimize.tar.gz root@$[Board_of_IP]:/home/root
# tar -xvzf dpu_sw_optimize.tar.gz
# cd ~/dpu_sw_optimize/zynqmp/
# sh zynqmp_dpu_optimize.sh
```

###Step 5: Run the Application.
### Step 4: Run the Application on Target

- Test the HLS kernel to see if it works properly with the test example. When the test example works, you will notice that two PNG files are generated in your execution directory.
- Test the HLS kernel to see if it works properly with the test example. When the test example works, you will notice that two PNG files are generated in your execution directory.
```
# chmod +x test_hls_kernel
# ./test_hls_kernel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ std::vector<cv::Mat> DecodeThread::v4l2Mats(2);
auto image_from_device_start = std::chrono::system_clock::now();
v4l2_videoCapture(v4l2Mats);
while (!queue_->push(FrameInfo{channel_id_, ++frame_id_, v4l2Mats[1], v4l2Mats[0]},
std::chrono::milliseconds(500))) {
std::chrono::milliseconds(10000))) {
if (is_stopped()) {
return -1;
}}


LOG(INFO)<<"Decode and Resize :"<<std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now() - image_from_device_start).count()/1000;
std::chrono::system_clock::now() - image_from_device_start).count()/1000<<"ms";
return 0;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ namespace vitis {
}

int DpuThread::run() {
auto dpu_start = std::chrono::system_clock::now();
FrameInfo frame;
if (!queue_in_->pop(frame, std::chrono::milliseconds(500))) {
return 0;
Expand All @@ -52,6 +53,8 @@ namespace vitis {
return -1;
}
}
LOG(INFO)<<"DPU in single thread duration :"<<std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now() - dpu_start).count()/1000<<"ms";
return 0;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ void GuiThread::clean_up_queue() {
}
}
int GuiThread::run() {
auto gui_start = std::chrono::system_clock::now();
FrameInfo frame_info;
if (!queue_->pop(frame_info, std::chrono::milliseconds(500))) {
inactive_counter_++;
Expand Down Expand Up @@ -129,6 +130,9 @@ int GuiThread::run() {
}
}
#endif
LOG(INFO)<<"Gui duration :"<<std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now() - gui_start).count()/1000<<"ms";

GuiThread::clean_up_queue();
return 0;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ namespace vitis{
long duration = 0;
if (!points_.empty()) {
auto end = points_.back();
duration =
duration =
std::chrono::duration_cast<std::chrono::milliseconds>(now - end)
.count();
LOG(INFO)<<"Sort thread duration : "<<duration<< " ms";
float duration2 = (float)duration;
float total = (float)points_.size();
fps = total / duration2 * 1000.0f;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,35 @@
#

#!/bin/bash
set -e
BOARD_IP=10.176.18.178

git pull
rm -rf build
mkdir build&&cd build
cmake -DUSE_DRM=on -DUSE_KERNEL=on .. && make
cp *drm ../
build_command(){
cmake -DUSE_DRM=on .. && make
cp *drm ../

}

clean_workspace(){
if [ -d build ]; then
rm -rf build
fi
mkdir -pv build
cd build

}
copy_file()
{
scp usb_input* root@$BOARD_IP:/home/root/
scp HLS_*/lib* root@$BOARD_IP:/usr/lib
scp HLS_*/test_hls_kernel root@$BOARD_IP:/home/root/
}

main(){
clean_workspace
git pull
build_command
copy_file
}

main @$
Loading

0 comments on commit 929bb36

Please sign in to comment.