[TOOL][NATIVE] Android native appliction for deploy and run

This application helps as a reference for verifying and integration of TVM compiled models on Android targets natively independent of RPC setup. tvmc will be used to for compiling tuning and to run it before deployment. This PR also covers * Enabling clml for tvmc compilation tool. * Graph runtime api "get_output_info" to return output tensor specification similar to "get_input_into" * This tool adds and enabled 3rdparty dependency "cnpy" to deal with npz files.
apache · Jan 16, 2023 · 3a770c2 · 3a770c2
1 parent 21d7968
commit 3a770c2
Show file tree

Hide file tree

Showing 19 changed files with 1,074 additions and 3 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -19,3 +19,6 @@
 [submodule "3rdparty/OpenCL-Headers"]
 	path = 3rdparty/OpenCL-Headers
 	url = https://github.com/KhronosGroup/OpenCL-Headers.git
+[submodule "3rdparty/cnpy"]
+	path = 3rdparty/cnpy
+	url = https://github.com/rogersce/cnpy.git
diff --git a/3rdparty/cnpy b/3rdparty/cnpy
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -591,6 +591,10 @@ if(USE_CPP_RPC)
   add_subdirectory("apps/cpp_rpc")
 endif()
 
+if(USE_CPP_RTVM)
+  add_subdirectory("apps/cpp_rtvm")
+endif()
+
 if(USE_IOS_RPC)
   add_subdirectory("apps/ios_rpc")
 endif()

diff --git a/LICENSE b/LICENSE
@@ -233,6 +233,7 @@ MIT License
 3rdparty/libcrc
 3rdparty/cma
 3rdparty/compiler-rt/builtin_fp16.h
+3rdparty/cnpy
 
 
 The Unlicense

diff --git a/apps/cpp_rtvm/CMakeLists.txt b/apps/cpp_rtvm/CMakeLists.txt
@@ -0,0 +1,79 @@
+cmake_policy(SET CMP0069 NEW) # suppress cmake warning about IPO
+
+set(RTVM_SOURCES
+  main.cc
+  tvm_runner.cc
+  ../../3rdparty/cnpy/cnpy.cpp
+)
+
+set(RTVM_LINKER_LIBS "")
+
+if(WIN32)
+  list(APPEND RTVM_SOURCES win32_process.cc)
+endif()
+
+# Set output to same directory as the other TVM libs
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
+add_executable(rtvm ${RTVM_SOURCES})
+
+include(CheckIPOSupported)
+check_ipo_supported(RESULT result OUTPUT output)
+if(result)
+  set_property(TARGET rtvm PROPERTY INTERPROCEDURAL_OPTIMIZATION_RELEASE TRUE)
+endif()
+
+if(WIN32)
+  target_compile_definitions(rtvm PUBLIC -DNOMINMAX)
+endif()
+
+if (OS)
+   if (OS STREQUAL "Linux")
+      set_property(TARGET rtvm PROPERTY LINK_FLAGS -lpthread)
+   endif()
+endif()
+
+if(USE_OPENCL)
+   if (ANDROID_ABI)
+     if(DEFINED ENV{ANDROID_NDK_MAJOR})
+       if($ENV{ANDROID_NDK_MAJOR} VERSION_LESS "23")
+         set_property(TARGET rtvm PROPERTY LINK_FLAGS -fuse-ld=gold)
+       endif()
+     endif()
+   endif()
+endif()
+
+target_include_directories(
+  rtvm
+  PUBLIC "../../include"
+  PUBLIC "../../3rdparty/cnpy"
+  PUBLIC DLPACK_PATH
+  PUBLIC DMLC_PATH
+)
+
+if (BUILD_FOR_ANDROID AND USE_HEXAGON)
+  get_hexagon_sdk_property("${USE_HEXAGON_SDK}" "${USE_HEXAGON_ARCH}"
+    DSPRPC_LIB DSPRPC_LIB_DIRS
+  )
+  if(DSPRPC_LIB_DIRS)
+    link_directories(${DSPRPC_LIB_DIRS})
+  else()
+    message(WARNING "Could not locate some Hexagon SDK components")
+  endif()
+  list(APPEND RTVM_LINKER_LIBS cdsprpc log)
+endif()
+
+if(USE_ETHOSN)
+  if (ETHOSN_RUNTIME_LIBRARY)
+    list(APPEND RTVM_LINKER_LIBS ${ETHOSN_RUNTIME_LIBRARY})
+  else()
+    message(WARNING "Could not locate Arm(R) Ethos(TM)-N runtime library components")
+  endif()
+endif()
+
+if(BUILD_STATIC_RUNTIME)
+  list(APPEND RTVM_LINKER_LIBS -Wl,--whole-archive tvm_runtime -Wl,--no-whole-archive z)
+else()
+  list(APPEND RTVM_LINKER_LIBS tvm_runtime z)
+endif()
+
+target_link_libraries(rtvm ${RTVM_LINKER_LIBS})
diff --git a/apps/cpp_rtvm/README.md b/apps/cpp_rtvm/README.md
@@ -0,0 +1,220 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements.  See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership.  The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License.  You may obtain a copy of the License at -->
+
+<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied.  See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+
+
+# Native Inference application for CPP Native
+
+Native inference tool ```rtvm``` helps in deploying TVM compiled models from a standalone cpp environment.
+Overall process starts from getting a model from a framework all the way up to running on target device using ``rtvm` tool.
+
+### Models
+
+Models can be downloaded from well known frameworks like Tensorflow, PyTorch, TFLite, Onnx ..etc.
+scripts/download_models.py can be used to download varius well known models from different frameworks.
+It will dump various models under model_data in current directory.
+
+```bash
+python3  scripts/download_models.py
+```
+
+### Auto Tuning
+Auto tuning process tunes various operatrors the given model for respective target. Auto tuning for remote devices use ```tvm_rpc``` and we need to setup the rpc environment before we invoke tuning.
+Please refer below section RPC setup for the same.
+
+Auto tunng is necessary to obtain best performaning kernels. We can skip this step if we have tuning log already or the tuning cashe is available from tophub (inplicite by TVM compilation process).
+Below message indicate that there exists some kernels not optimized for the selected target. In this case we can proceed with tuning to best performance.
+```One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.```
+
+with
+
+``` bash
+tvm tracker running on ```TVM_TRACKER_HOST```
+tracker port being ```TVM_TRACKER_PORT```
+rpc device access key being ```TVM_RPC_KEY```
+the model to be tuned being ```./model_data/keras-resnet50/resnet50.h5```
+```
+
+the below command we can generate the tuning cache to file ```./model_data/keras-resnet50/keras-resnet50.log```
+
+```bash
+python3 -m tvm.driver.tvmc tune --target="opencl" --target-host="llvm -mtriple=aarch64-linux-gnu" \
+./model_data/keras-resnet50/resnet50.h5 -o ./model_data/keras-resnet50/keras-resnet50.log \
+--early-stopping 0 --repeat 30 --rpc-key android --rpc-tracker 127.0.0.1:9120 --trials 1024 \
+--tuning-records ./model_data/keras-resnet50/keras-resnet50-records.log --tuner xgb
+```
+
+where
+```bash
+--target="opencl -device=adreno" refers to opencl device on Android device
+--target-host="llvm -mtriple=aarch64-linux-gnu" refers to target_host being an ARM64 CPU
+Options --early-stopping, --repeat, --trials, --tuner are Auto TVM specific options. Please refer to AutoTVM documentation for more details here.
+```
+
+### Compile the model
+
+Compilation step generates TVM compiler output artifacts which need to be taken to target device for deployment.
+These artifacts is a compressed archive with kernel shared lib, json with cgaph description and params binary.
+
+Below command will generate the same
+
+
+```bash
+python3 -m tvm.driver.tvmc compile --cross-compiler ${ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang \
+--target="opencl, llvm" --target-llvm-mtriple aarch64-linux-gnu -o keras-resnet50.tar ./model_data/keras-resnet50/resnet50.h5
+```
+
+where
+```
+--cross-compiler : Indicates the cross compiler path for kernel library generation
+--target="opencl, llvm" indicates target and host devices
+--
+```
+
+### Test Run via RPC
+
+At this stage we can verify the generated compiler output for execution correctness over the RPC setup interface.
+Below command can run the compiled output on remote target device.
+
+with
+
+``` bash
+tvm tracker running on ```TVM_TRACKER_HOST```
+tracker port being ```TVM_TRACKER_PORT```
+rpc device access key being ```TVM_RPC_KEY```
+compilation out being keras-resnet50.tar
+```
+
+```bash
+python3 -m tvm.driver.tvmc run --device="cl" keras-resnet50.tar --rpc-key android --rpc-tracker 127.0.0.1:9120 --print-time
+```
+
+This inputs random inputs and validates the execution correctness of the compiled model.
+
+```tvmc``` tool has various options to input custom data, profile, benchmark the execution.
+
+
+### Deploy
+
+The tar archive generated can be used with ```rtvm``` application in Android native to run the same using tvm_runtime.
+
+
+# RPC Setup
+
+for Android devices require cross compilation of tvm_rpc (also libtvm_runtime.so which is a dependency) for remote device.
+RPC setup involved running tracker on host device and running tvm_rpc on target device.
+
+### Tracker
+
+below command runs the tracker on host over port ```9100```
+
+```bash
+python3 -m tvm.exec.rpc_tracker --host 127.0.0.1 --port 9100"
+```
+### RPC on Target
+
+With ```abcd1234ef``` being adb device id and tvm_rpc (and libtvm_runtime.so) is pushed to target device at ```/data/local/tmp/tvm_rpc/```
+
+```bash
+export ANDROID_SERIAL=abcd1234ef
+# Below settings will reroute networking tcm connections on devices to host device via adb interface
+adb reverse tcp:9100 tcp:9100
+adb forward tcp:5000 tcp:5000
+# Run the tvm_rpc on device
+env adb shell "cd /data/local/tmp/tvm_rpc; killall -9 tvm_rpc; \
+LD_LIBRARY_PATH=/data/local/tmp/tvm_rpc/ ./tvm_rpc server --host=0.0.0.0 --port=5000 --port-end=5010 --tracker=127.0.0.1:9100 --key=android
+```
+
+Now we have the rpc setup with ```--rpc-tracker=27.0.0.1:9100``` and ```--rpc-key=android```.
+
+
+# Target Specific Configuration
+
+Below sections describe device/target specific settings to be used with tvmc tool
+
+### Adreno GPU
+
+Adreno GPU has a docker defined that helps to ease the development environment.
+
+Below command builds host and target rpc components for Adreno and drops into an interactive shell.
+
+```bash
+./tests/scripts/ci.py adreno -i
+```
+
+Also, one can build with Adreno OpenCLML SDK support
+
+```bash
+export ADRENO_OPENCL=<Path to OpenCLML SDK>
+./tests/scripts/ci.py adreno -i
+```
+
+Above command produces
+```build-adreno``` which is host build
+```build-adreno-target``` which contains cross compiled tvm_rpc and libtvm_runtime.so
+
+
+Below options to be used for Adreno GPU while working with tvmc
+
+* Tuning
+
+```
+--target="opencl -device=adreno"
+--target-host="llvm -mtriple=aarch64-linux-gnu"
+```
+
+* Compilation
+
+```
+--cross-compiler ${ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang
+--target="opencl, llvm"
+--target-opencl-device adreno
+--target-llvm-mtriple aarch64-linux-gnu
+```
+
+While enabling CLML just need to specify below target option for compilation.
+```--target="opencl, clml, llvm"```
+
+
+* Running
+
+```--device="cl"```
+
+
+For example with a model from keras ```./model_data/keras-resnet50/resnet50.h5```
+
+
+```bash
+# Tuning
+python3 -m tvm.driver.tvmc tune --desired-layout NCHW --target="opencl -device=adreno" --target-host="llvm -mtriple=aarch64-linux-gnu" \
+./model_data/keras-resnet50/resnet50.h5 -o ./model_data/keras-resnet50/keras-resnet50.log --early-stopping 0 --repeat 30 \
+--rpc-key android --rpc-tracker 127.0.0.1:9120 --trials 1024 --tuning-records ./model_data/keras-resnet50/keras-resnet50-records.log --tuner xgb
+
+# Tuning produces tuning log ./model_data/keras-resnet50/keras-resnet50.log
+
+
+# Compilation
+python3 -m tvm.driver.tvmc compile --cross-compiler ${ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang \
+--desired-layout NCHW --target="opencl, llvm" --target-opencl-device adreno --target-llvm-mtriple aarch64-linux-gnu \
+./model_data/keras-resnet50/resnet50.h5 -o keras-resnet50.tar
+
+# Compilation produces target artifacts keras-resnet50.tar
+
+# Run on adreno device via RPC
+# Assuming tracker is running on 127.0.0.1:9190 and target key is "android"
+python3 -m tvm.driver.tvmc run --device="cl" keras-resnet50.tar --rpc-key android --rpc-tracker 127.0.0.1:9120 --print-time
+
+```