Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Runtime] Enable option to use OpenMP thread pool #4089

Merged
merged 10 commits into from
Oct 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" O
tvm_option(USE_STACKVM_RUNTIME "Include stackvm into the runtime" OFF)
tvm_option(USE_GRAPH_RUNTIME "Build with tiny graph runtime" ON)
tvm_option(USE_GRAPH_RUNTIME_DEBUG "Build with tiny graph runtime debug mode" OFF)
tvm_option(USE_OPENMP "Build with OpenMP thread pool implementation" OFF)
tvm_option(USE_RELAY_DEBUG "Building Relay in debug mode..." OFF)
tvm_option(USE_SGX "Build with SGX" OFF)
tvm_option(USE_RTTI "Build with RTTI" ON)
Expand Down Expand Up @@ -154,6 +155,7 @@ list(APPEND COMPILER_SRCS ${RELAY_BACKEND_SRCS})
list(APPEND COMPILER_SRCS ${RELAY_IR_SRCS})
list(APPEND COMPILER_SRCS ${RELAY_QNN_SRCS})


if(USE_VM_PROFILER)
message(STATUS "Build compiler with Relay VM profiler support...")
file(GLOB BACKEND_VM_PROFILER_SRCS src/relay/backend/vm/profiler/*.cc)
Expand Down Expand Up @@ -233,6 +235,7 @@ include(cmake/modules/VTA.cmake)
include(cmake/modules/CUDA.cmake)
include(cmake/modules/OpenCL.cmake)
include(cmake/modules/OpenGL.cmake)
include(cmake/modules/OpenMP.cmake)
include(cmake/modules/Vulkan.cmake)
include(cmake/modules/Metal.cmake)
include(cmake/modules/ROCM.cmake)
Expand Down Expand Up @@ -264,6 +267,7 @@ add_library(tvm SHARED ${COMPILER_SRCS} ${RUNTIME_SRCS})
add_library(tvm_topi SHARED ${TOPI_SRCS})
add_library(tvm_runtime SHARED ${RUNTIME_SRCS})


if(USE_RELAY_DEBUG)
message(STATUS "Building Relay in debug mode...")
set_target_properties(tvm PROPERTIES COMPILE_DEFINITIONS "USE_RELAY_DEBUG")
Expand Down
4 changes: 4 additions & 0 deletions cmake/config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ set(USE_BLAS none)
# set(USE_MKL_PATH <path to venv or site-packages directory>) if using `pip install mkl`
set(USE_MKL_PATH none)

# Whether use OpenMP thread pool, choices: gnu, intel
# Note: "gnu" uses gomp library, "intel" uses iomp5 library
set(USE_OPENMP none)

# Whether use contrib.random in runtime
set(USE_RANDOM OFF)

Expand Down
48 changes: 48 additions & 0 deletions cmake/modules/OpenMP.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# OpenMP Module
if(USE_OPENMP STREQUAL "gnu")
find_package(OpenMP)
if(OPENMP_FOUND)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
list(APPEND TVM_RUNTIME_LINKER_LIBS ${OpenMP_CXX_LIBRARIES})
add_definitions(-DTVM_THREADPOOL_USE_OPENMP=1)
message(STATUS "Build with OpenMP ${OpenMP_CXX_LIBRARIES}")
else()
add_definitions(-DTVM_THREADPOOL_USE_OPENMP=0)
message(WARNING "OpenMP cannot be found, use TVM threadpool instead.")
endif()
elseif(USE_OPENMP STREQUAL "intel")
find_package(OpenMP)
if(OPENMP_FOUND)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
if (MSVC)
find_library(OMP_LIBRARY NAMES libiomp5md)
else()
find_library(OMP_LIBRARY NAMES iomp5)
endif()
list(APPEND TVM_RUNTIME_LINKER_LIBS ${OMP_LIBRARY})
add_definitions(-DTVM_THREADPOOL_USE_OPENMP=1)
message(STATUS "Build with OpenMP " ${OMP_LIBRARY})
else()
add_definitions(-DTVM_THREADPOOL_USE_OPENMP=0)
message(WARNING "OpenMP cannot be found, use TVM threadpool instead.")
endif()
else()
add_definitions(-DTVM_THREADPOOL_USE_OPENMP=0)
endif()
26 changes: 26 additions & 0 deletions src/runtime/thread_pool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
#include <tvm/runtime/threading_backend.h>
#include <dmlc/thread_local.h>
#include <dmlc/logging.h>
#if TVM_THREADPOOL_USE_OPENMP
#include <omp.h>
#endif
#include <thread>
#include <condition_variable>
#include <mutex>
Expand Down Expand Up @@ -394,12 +397,34 @@ int TVMBackendParallelLaunch(
FTVMParallelLambda flambda,
void* cdata,
int num_task) {
#if !TVM_THREADPOOL_USE_OPENMP
int res = tvm::runtime::ThreadPool::ThreadLocal()->Launch(
flambda, cdata, num_task, 1);
return res;
#else
int num_workers = tvm::runtime::threading::MaxConcurrency();
if (num_task == 0) num_task = num_workers;
omp_set_num_threads(num_workers);
#pragma omp parallel num_threads(num_workers)
{
TVMParallelGroupEnv env;
env.num_task = num_task;
std::atomic<int32_t>* sync_counter = new std::atomic<int>[num_task * tvm::runtime::kSyncStride];
for (int i = 0; i < num_task; ++i) {
sync_counter[i * tvm::runtime::kSyncStride].store(
0, std::memory_order_relaxed);
}
env.sync_handle = sync_counter;
Comment on lines +412 to +417
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part assumes that need_sync==1, which is the de facto default now, but we probably need to keep the same logical behavior with the existing thread pool. Another option is to remove need_sync from the code. @tqchen Do you want to talk about if we still want need_sync in ThreadPool::Launch()? https://github.com/dmlc/tvm/blob/master/src/runtime/thread_pool.cc#L298

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need_sync is used to generate barriers. if we do openmp, perhaps we can ignore it and just use omp's barrier

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

(*flambda)(omp_get_thread_num(), &env, cdata);
}
return 0;
#endif
}

int TVMBackendParallelBarrier(int task_id, TVMParallelGroupEnv* penv) {
#if TVM_THREADPOOL_USE_OPENMP
#pragma omp barrier
#else
using tvm::runtime::kSyncStride;
int num_task = penv->num_task;
std::atomic<int>* sync_counter =
Expand All @@ -415,5 +440,6 @@ int TVMBackendParallelBarrier(int task_id, TVMParallelGroupEnv* penv) {
}
}
std::atomic_thread_fence(std::memory_order_acquire);
#endif
return 0;
}