From da1fd96d4e1bd6517152b39e3474342c7853b607 Mon Sep 17 00:00:00 2001 From: Patrick Niklaus Date: Fri, 16 May 2014 13:26:42 +0200 Subject: [PATCH 1/9] Port extractor to TBB --- CMakeLists.txt | 4 + Extractor/BaseParser.cpp | 2 +- Extractor/PBFParser.cpp | 41 +++-- Extractor/ScriptingEnvironment.cpp | 142 +++++++-------- Extractor/ScriptingEnvironment.h | 13 +- cmake/FindTBB.cmake | 283 +++++++++++++++++++++++++++++ 6 files changed, 392 insertions(+), 93 deletions(-) create mode 100644 cmake/FindTBB.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index cf5a801fa84..9532114c682 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -177,6 +177,10 @@ target_link_libraries(osrm-datastore ${Boost_LIBRARIES} UUID GITDESCRIPTION COOR find_package(Threads REQUIRED) target_link_libraries(osrm-extract ${CMAKE_THREAD_LIBS_INIT}) +find_package(TBB REQUIRED) +target_link_libraries(osrm-extract ${TBB_LIBRARIES}) +include_directories(${TBB_INCLUDE_DIR}) + find_package(Lua52) if(NOT LUA52_FOUND) find_package(Lua51 REQUIRED) diff --git a/Extractor/BaseParser.cpp b/Extractor/BaseParser.cpp index 64028fca5cc..4ab090f921a 100644 --- a/Extractor/BaseParser.cpp +++ b/Extractor/BaseParser.cpp @@ -42,7 +42,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. BaseParser::BaseParser(ExtractorCallbacks *extractor_callbacks, ScriptingEnvironment &scripting_environment) : extractor_callbacks(extractor_callbacks), - lua_state(scripting_environment.getLuaStateForThreadID(0)), + lua_state(scripting_environment.getLuaState()), scripting_environment(scripting_environment), use_turn_restrictions(true) { ReadUseRestrictionsSetting(); diff --git a/Extractor/PBFParser.cpp b/Extractor/PBFParser.cpp index 8aab15e6765..d1688f66e6e 100644 --- a/Extractor/PBFParser.cpp +++ b/Extractor/PBFParser.cpp @@ -40,6 +40,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "../Util/SimpleLogger.h" #include "../typedefs.h" +#include + #include #include @@ -257,16 +259,18 @@ inline void PBFParser::parseDenseNode(ParserThreadData *thread_data) denseTagIndex += 2; } } -#pragma omp parallel - { - const int thread_num = omp_get_thread_num(); -#pragma omp parallel for schedule(guided) - for (int i = 0; i < number_of_nodes; ++i) + + tbb::parallel_for(tbb::blocked_range(0, extracted_nodes_vector.size()), + [this, &extracted_nodes_vector](const tbb::blocked_range& range) { - ImportNode &import_node = extracted_nodes_vector[i]; - ParseNodeInLua(import_node, scripting_environment.getLuaStateForThreadID(thread_num)); + lua_State* lua_state = this->scripting_environment.getLuaState(); + for (size_t i = range.begin(); i != range.end(); i++) + { + ImportNode &import_node = extracted_nodes_vector[i]; + ParseNodeInLua(import_node, lua_state); + } } - } + ); for (const ImportNode &import_node : extracted_nodes_vector) { @@ -424,16 +428,21 @@ inline void PBFParser::parseWay(ParserThreadData *thread_data) } } -#pragma omp parallel for schedule(guided) - for (int i = 0; i < number_of_ways; ++i) - { - ExtractionWay &extraction_way = parsed_way_vector[i]; - if (2 <= extraction_way.path.size()) + // TODO: investigate if schedule guided will be handled by tbb automatically + tbb::parallel_for(tbb::blocked_range(0, parsed_way_vector.size()), + [this, &parsed_way_vector](const tbb::blocked_range& range) { - ParseWayInLua(extraction_way, - scripting_environment.getLuaStateForThreadID(omp_get_thread_num())); + lua_State* lua_state = this->scripting_environment.getLuaState(); + for (size_t i = range.begin(); i != range.end(); i++) + { + ExtractionWay &extraction_way = parsed_way_vector[i]; + if (2 <= extraction_way.path.size()) + { + ParseWayInLua(extraction_way, lua_state); + } + } } - } + ); for (ExtractionWay &extraction_way : parsed_way_vector) { diff --git a/Extractor/ScriptingEnvironment.cpp b/Extractor/ScriptingEnvironment.cpp index a4a7a2d67fd..3379647ffdd 100644 --- a/Extractor/ScriptingEnvironment.cpp +++ b/Extractor/ScriptingEnvironment.cpp @@ -38,87 +38,85 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ScriptingEnvironment::ScriptingEnvironment() {} ScriptingEnvironment::ScriptingEnvironment(const char *file_name) +: file_name(file_name) { SimpleLogger().Write() << "Using script " << file_name; +} - // Create a new lua state - for (int i = 0; i < omp_get_max_threads(); ++i) - { - lua_state_vector.push_back(luaL_newstate()); - } - -// Connect LuaBind to this lua state for all threads -#pragma omp parallel +void ScriptingEnvironment::initLuaState(lua_State* lua_state) +{ + luabind::open(lua_state); + // open utility libraries string library; + luaL_openlibs(lua_state); + + luaAddScriptFolderToLoadPath(lua_state, file_name.c_str()); + + // Add our function to the state's global scope + luabind::module(lua_state)[ + luabind::def("print", LUA_print), + luabind::def("durationIsValid", durationIsValid), + luabind::def("parseDuration", parseDuration) + ]; + + luabind::module(lua_state)[luabind::class_>("keyVals") + .def("Add", &HashTable::Add) + .def("Find", &HashTable::Find) + .def("Holds", &HashTable::Holds)]; + + luabind::module(lua_state)[luabind::class_("Node") + .def(luabind::constructor<>()) + .def_readwrite("lat", &ImportNode::lat) + .def_readwrite("lon", &ImportNode::lon) + .def_readonly("id", &ImportNode::id) + .def_readwrite("bollard", &ImportNode::bollard) + .def_readwrite("traffic_light", &ImportNode::trafficLight) + .def_readwrite("tags", &ImportNode::keyVals)]; + + luabind::module(lua_state) + [luabind::class_("Way") + .def(luabind::constructor<>()) + .def_readonly("id", &ExtractionWay::id) + .def_readwrite("name", &ExtractionWay::name) + .def_readwrite("speed", &ExtractionWay::speed) + .def_readwrite("backward_speed", &ExtractionWay::backward_speed) + .def_readwrite("duration", &ExtractionWay::duration) + .def_readwrite("type", &ExtractionWay::type) + .def_readwrite("access", &ExtractionWay::access) + .def_readwrite("roundabout", &ExtractionWay::roundabout) + .def_readwrite("is_access_restricted", &ExtractionWay::isAccessRestricted) + .def_readwrite("ignore_in_grid", &ExtractionWay::ignoreInGrid) + .def_readwrite("tags", &ExtractionWay::keyVals) + .def_readwrite("direction", &ExtractionWay::direction) + .enum_("constants")[ + luabind::value("notSure", 0), + luabind::value("oneway", 1), + luabind::value("bidirectional", 2), + luabind::value("opposite", 3) + ]]; + + // fails on c++11/OS X 10.9 + luabind::module(lua_state)[luabind::class_>("vector").def( + "Add", + static_cast::*)(const std::string &)>( + &std::vector::push_back))]; + + if (0 != luaL_dofile(lua_state, file_name.c_str())) { - lua_State *lua_state = getLuaStateForThreadID(omp_get_thread_num()); - luabind::open(lua_state); - // open utility libraries string library; - luaL_openlibs(lua_state); - - luaAddScriptFolderToLoadPath(lua_state, file_name); - - // Add our function to the state's global scope - luabind::module(lua_state)[ - luabind::def("print", LUA_print), - luabind::def("durationIsValid", durationIsValid), - luabind::def("parseDuration", parseDuration) - ]; - - luabind::module(lua_state)[luabind::class_>("keyVals") - .def("Add", &HashTable::Add) - .def("Find", &HashTable::Find) - .def("Holds", &HashTable::Holds)]; - - luabind::module(lua_state)[luabind::class_("Node") - .def(luabind::constructor<>()) - .def_readwrite("lat", &ImportNode::lat) - .def_readwrite("lon", &ImportNode::lon) - .def_readonly("id", &ImportNode::id) - .def_readwrite("bollard", &ImportNode::bollard) - .def_readwrite("traffic_light", &ImportNode::trafficLight) - .def_readwrite("tags", &ImportNode::keyVals)]; - - luabind::module(lua_state) - [luabind::class_("Way") - .def(luabind::constructor<>()) - .def_readonly("id", &ExtractionWay::id) - .def_readwrite("name", &ExtractionWay::name) - .def_readwrite("speed", &ExtractionWay::speed) - .def_readwrite("backward_speed", &ExtractionWay::backward_speed) - .def_readwrite("duration", &ExtractionWay::duration) - .def_readwrite("type", &ExtractionWay::type) - .def_readwrite("access", &ExtractionWay::access) - .def_readwrite("roundabout", &ExtractionWay::roundabout) - .def_readwrite("is_access_restricted", &ExtractionWay::isAccessRestricted) - .def_readwrite("ignore_in_grid", &ExtractionWay::ignoreInGrid) - .def_readwrite("tags", &ExtractionWay::keyVals) - .def_readwrite("direction", &ExtractionWay::direction) - .enum_("constants")[ - luabind::value("notSure", 0), - luabind::value("oneway", 1), - luabind::value("bidirectional", 2), - luabind::value("opposite", 3) - ]]; - - // fails on c++11/OS X 10.9 - luabind::module(lua_state)[luabind::class_>("vector").def( - "Add", - static_cast::*)(const std::string &)>( - &std::vector::push_back))]; - - if (0 != luaL_dofile(lua_state, file_name)) - { - throw OSRMException("ERROR occured in scripting block"); - } + throw OSRMException("ERROR occured in scripting block"); } } -ScriptingEnvironment::~ScriptingEnvironment() +lua_State *ScriptingEnvironment::getLuaState() { - for (unsigned i = 0; i < lua_state_vector.size(); ++i) + bool initialized = false; + auto& ref = script_contexts.local(initialized); + if (!initialized) { - // lua_state_vector[i]; + std::shared_ptr state(luaL_newstate(), lua_close); + ref = state; + initLuaState(ref.get()); } + + return ref.get(); } -lua_State *ScriptingEnvironment::getLuaStateForThreadID(const int id) { return lua_state_vector[id]; } diff --git a/Extractor/ScriptingEnvironment.h b/Extractor/ScriptingEnvironment.h index 632cce9c586..2b1fffead9b 100644 --- a/Extractor/ScriptingEnvironment.h +++ b/Extractor/ScriptingEnvironment.h @@ -28,7 +28,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef SCRIPTINGENVIRONMENT_H_ #define SCRIPTINGENVIRONMENT_H_ -#include +#include +#include +#include struct lua_State; @@ -37,11 +39,14 @@ class ScriptingEnvironment public: ScriptingEnvironment(); explicit ScriptingEnvironment(const char *file_name); - virtual ~ScriptingEnvironment(); - lua_State *getLuaStateForThreadID(const int); + lua_State *getLuaState(); - std::vector lua_state_vector; + private: + void initLuaState(lua_State* lua_state); + + std::string file_name; + tbb::enumerable_thread_specific> script_contexts; }; #endif /* SCRIPTINGENVIRONMENT_H_ */ diff --git a/cmake/FindTBB.cmake b/cmake/FindTBB.cmake new file mode 100644 index 00000000000..f9e3e0f57fc --- /dev/null +++ b/cmake/FindTBB.cmake @@ -0,0 +1,283 @@ +# Locate Intel Threading Building Blocks include paths and libraries +# FindTBB.cmake can be found at https://code.google.com/p/findtbb/ +# Written by Hannes Hofmann +# Improvements by Gino van den Bergen , +# Florian Uhlig , +# Jiri Marsik + +# The MIT License +# +# Copyright (c) 2011 Hannes Hofmann +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# GvdB: This module uses the environment variable TBB_ARCH_PLATFORM which defines architecture and compiler. +# e.g. "ia32/vc8" or "em64t/cc4.1.0_libc2.4_kernel2.6.16.21" +# TBB_ARCH_PLATFORM is set by the build script tbbvars[.bat|.sh|.csh], which can be found +# in the TBB installation directory (TBB_INSTALL_DIR). +# +# GvdB: Mac OS X distribution places libraries directly in lib directory. +# +# For backwards compatibility, you may explicitely set the CMake variables TBB_ARCHITECTURE and TBB_COMPILER. +# TBB_ARCHITECTURE [ ia32 | em64t | itanium ] +# which architecture to use +# TBB_COMPILER e.g. vc9 or cc3.2.3_libc2.3.2_kernel2.4.21 or cc4.0.1_os10.4.9 +# which compiler to use (detected automatically on Windows) + +# This module respects +# TBB_INSTALL_DIR or $ENV{TBB21_INSTALL_DIR} or $ENV{TBB_INSTALL_DIR} + +# This module defines +# TBB_INCLUDE_DIRS, where to find task_scheduler_init.h, etc. +# TBB_LIBRARY_DIRS, where to find libtbb, libtbbmalloc +# TBB_DEBUG_LIBRARY_DIRS, where to find libtbb_debug, libtbbmalloc_debug +# TBB_INSTALL_DIR, the base TBB install directory +# TBB_LIBRARIES, the libraries to link against to use TBB. +# TBB_DEBUG_LIBRARIES, the libraries to link against to use TBB with debug symbols. +# TBB_FOUND, If false, don't try to use TBB. +# TBB_INTERFACE_VERSION, as defined in tbb/tbb_stddef.h + + +if (WIN32) + # has em64t/vc8 em64t/vc9 + # has ia32/vc7.1 ia32/vc8 ia32/vc9 + set(_TBB_DEFAULT_INSTALL_DIR "C:/Program Files/Intel/TBB" "C:/Program Files (x86)/Intel/TBB") + set(_TBB_LIB_NAME "tbb") + set(_TBB_LIB_MALLOC_NAME "${_TBB_LIB_NAME}malloc") + set(_TBB_LIB_DEBUG_NAME "${_TBB_LIB_NAME}_debug") + set(_TBB_LIB_MALLOC_DEBUG_NAME "${_TBB_LIB_MALLOC_NAME}_debug") + if (MSVC71) + set (_TBB_COMPILER "vc7.1") + endif(MSVC71) + if (MSVC80) + set(_TBB_COMPILER "vc8") + endif(MSVC80) + if (MSVC90) + set(_TBB_COMPILER "vc9") + endif(MSVC90) + if(MSVC10) + set(_TBB_COMPILER "vc10") + endif(MSVC10) + # Todo: add other Windows compilers such as ICL. + set(_TBB_ARCHITECTURE ${TBB_ARCHITECTURE}) +endif (WIN32) + +if (UNIX) + if (APPLE) + # MAC + set(_TBB_DEFAULT_INSTALL_DIR "/Library/Frameworks/Intel_TBB.framework/Versions") + # libs: libtbb.dylib, libtbbmalloc.dylib, *_debug + set(_TBB_LIB_NAME "tbb") + set(_TBB_LIB_MALLOC_NAME "${_TBB_LIB_NAME}malloc") + set(_TBB_LIB_DEBUG_NAME "${_TBB_LIB_NAME}_debug") + set(_TBB_LIB_MALLOC_DEBUG_NAME "${_TBB_LIB_MALLOC_NAME}_debug") + # default flavor on apple: ia32/cc4.0.1_os10.4.9 + # Jiri: There is no reason to presume there is only one flavor and + # that user's setting of variables should be ignored. + if(NOT TBB_COMPILER) + set(_TBB_COMPILER "cc4.0.1_os10.4.9") + elseif (NOT TBB_COMPILER) + set(_TBB_COMPILER ${TBB_COMPILER}) + endif(NOT TBB_COMPILER) + if(NOT TBB_ARCHITECTURE) + set(_TBB_ARCHITECTURE "ia32") + elseif(NOT TBB_ARCHITECTURE) + set(_TBB_ARCHITECTURE ${TBB_ARCHITECTURE}) + endif(NOT TBB_ARCHITECTURE) + else (APPLE) + # LINUX + set(_TBB_DEFAULT_INSTALL_DIR "/opt/intel/tbb" "/usr/local/include" "/usr/include") + set(_TBB_LIB_NAME "tbb") + set(_TBB_LIB_MALLOC_NAME "${_TBB_LIB_NAME}malloc") + set(_TBB_LIB_DEBUG_NAME "${_TBB_LIB_NAME}_debug") + set(_TBB_LIB_MALLOC_DEBUG_NAME "${_TBB_LIB_MALLOC_NAME}_debug") + # has em64t/cc3.2.3_libc2.3.2_kernel2.4.21 em64t/cc3.3.3_libc2.3.3_kernel2.6.5 em64t/cc3.4.3_libc2.3.4_kernel2.6.9 em64t/cc4.1.0_libc2.4_kernel2.6.16.21 + # has ia32/* + # has itanium/* + set(_TBB_COMPILER ${TBB_COMPILER}) + set(_TBB_ARCHITECTURE ${TBB_ARCHITECTURE}) + endif (APPLE) +endif (UNIX) + +if (CMAKE_SYSTEM MATCHES "SunOS.*") +# SUN +# not yet supported +# has em64t/cc3.4.3_kernel5.10 +# has ia32/* +endif (CMAKE_SYSTEM MATCHES "SunOS.*") + + +#-- Clear the public variables +set (TBB_FOUND "NO") + + +#-- Find TBB install dir and set ${_TBB_INSTALL_DIR} and cached ${TBB_INSTALL_DIR} +# first: use CMake variable TBB_INSTALL_DIR +if (TBB_INSTALL_DIR) + set (_TBB_INSTALL_DIR ${TBB_INSTALL_DIR}) +endif (TBB_INSTALL_DIR) +# second: use environment variable +if (NOT _TBB_INSTALL_DIR) + if (NOT "$ENV{TBB_INSTALL_DIR}" STREQUAL "") + set (_TBB_INSTALL_DIR $ENV{TBB_INSTALL_DIR}) + endif (NOT "$ENV{TBB_INSTALL_DIR}" STREQUAL "") + # Intel recommends setting TBB21_INSTALL_DIR + if (NOT "$ENV{TBB21_INSTALL_DIR}" STREQUAL "") + set (_TBB_INSTALL_DIR $ENV{TBB21_INSTALL_DIR}) + endif (NOT "$ENV{TBB21_INSTALL_DIR}" STREQUAL "") + if (NOT "$ENV{TBB22_INSTALL_DIR}" STREQUAL "") + set (_TBB_INSTALL_DIR $ENV{TBB22_INSTALL_DIR}) + endif (NOT "$ENV{TBB22_INSTALL_DIR}" STREQUAL "") + if (NOT "$ENV{TBB30_INSTALL_DIR}" STREQUAL "") + set (_TBB_INSTALL_DIR $ENV{TBB30_INSTALL_DIR}) + endif (NOT "$ENV{TBB30_INSTALL_DIR}" STREQUAL "") +endif (NOT _TBB_INSTALL_DIR) +# third: try to find path automatically +if (NOT _TBB_INSTALL_DIR) + if (_TBB_DEFAULT_INSTALL_DIR) + set (_TBB_INSTALL_DIR ${_TBB_DEFAULT_INSTALL_DIR}) + endif (_TBB_DEFAULT_INSTALL_DIR) +endif (NOT _TBB_INSTALL_DIR) +# sanity check +if (NOT _TBB_INSTALL_DIR) + message ("ERROR: Unable to find Intel TBB install directory. ${_TBB_INSTALL_DIR}") +else (NOT _TBB_INSTALL_DIR) +# finally: set the cached CMake variable TBB_INSTALL_DIR +if (NOT TBB_INSTALL_DIR) + set (TBB_INSTALL_DIR ${_TBB_INSTALL_DIR} CACHE PATH "Intel TBB install directory") + mark_as_advanced(TBB_INSTALL_DIR) +endif (NOT TBB_INSTALL_DIR) + + +#-- A macro to rewrite the paths of the library. This is necessary, because +# find_library() always found the em64t/vc9 version of the TBB libs +macro(TBB_CORRECT_LIB_DIR var_name) +# if (NOT "${_TBB_ARCHITECTURE}" STREQUAL "em64t") + string(REPLACE em64t "${_TBB_ARCHITECTURE}" ${var_name} ${${var_name}}) +# endif (NOT "${_TBB_ARCHITECTURE}" STREQUAL "em64t") + string(REPLACE ia32 "${_TBB_ARCHITECTURE}" ${var_name} ${${var_name}}) + string(REPLACE vc7.1 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) + string(REPLACE vc8 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) + string(REPLACE vc9 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) + string(REPLACE vc10 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) +endmacro(TBB_CORRECT_LIB_DIR var_content) + + +#-- Look for include directory and set ${TBB_INCLUDE_DIR} +set (TBB_INC_SEARCH_DIR ${_TBB_INSTALL_DIR}/include) +# Jiri: tbbvars now sets the CPATH environment variable to the directory +# containing the headers. +find_path(TBB_INCLUDE_DIR + tbb/task_scheduler_init.h + PATHS ${TBB_INC_SEARCH_DIR} ENV CPATH +) +mark_as_advanced(TBB_INCLUDE_DIR) + + +#-- Look for libraries +# GvdB: $ENV{TBB_ARCH_PLATFORM} is set by the build script tbbvars[.bat|.sh|.csh] +if (NOT $ENV{TBB_ARCH_PLATFORM} STREQUAL "") + set (_TBB_LIBRARY_DIR + ${_TBB_INSTALL_DIR}/lib/$ENV{TBB_ARCH_PLATFORM} + ${_TBB_INSTALL_DIR}/$ENV{TBB_ARCH_PLATFORM}/lib + ) +endif (NOT $ENV{TBB_ARCH_PLATFORM} STREQUAL "") +# Jiri: This block isn't mutually exclusive with the previous one +# (hence no else), instead I test if the user really specified +# the variables in question. +if ((NOT ${TBB_ARCHITECTURE} STREQUAL "") AND (NOT ${TBB_COMPILER} STREQUAL "")) + # HH: deprecated + message(STATUS "[Warning] FindTBB.cmake: The use of TBB_ARCHITECTURE and TBB_COMPILER is deprecated and may not be supported in future versions. Please set \$ENV{TBB_ARCH_PLATFORM} (using tbbvars.[bat|csh|sh]).") + # Jiri: It doesn't hurt to look in more places, so I store the hints from + # ENV{TBB_ARCH_PLATFORM} and the TBB_ARCHITECTURE and TBB_COMPILER + # variables and search them both. + set (_TBB_LIBRARY_DIR "${_TBB_INSTALL_DIR}/${_TBB_ARCHITECTURE}/${_TBB_COMPILER}/lib" ${_TBB_LIBRARY_DIR}) +endif ((NOT ${TBB_ARCHITECTURE} STREQUAL "") AND (NOT ${TBB_COMPILER} STREQUAL "")) + +# GvdB: Mac OS X distribution places libraries directly in lib directory. +list(APPEND _TBB_LIBRARY_DIR ${_TBB_INSTALL_DIR}/lib) + +# Jiri: No reason not to check the default paths. From recent versions, +# tbbvars has started exporting the LIBRARY_PATH and LD_LIBRARY_PATH +# variables, which now point to the directories of the lib files. +# It all makes more sense to use the ${_TBB_LIBRARY_DIR} as a HINTS +# argument instead of the implicit PATHS as it isn't hard-coded +# but computed by system introspection. Searching the LIBRARY_PATH +# and LD_LIBRARY_PATH environment variables is now even more important +# that tbbvars doesn't export TBB_ARCH_PLATFORM and it facilitates +# the use of TBB built from sources. +find_library(TBB_LIBRARY ${_TBB_LIB_NAME} HINTS ${_TBB_LIBRARY_DIR} + PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) +find_library(TBB_MALLOC_LIBRARY ${_TBB_LIB_MALLOC_NAME} HINTS ${_TBB_LIBRARY_DIR} + PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) + +#Extract path from TBB_LIBRARY name +get_filename_component(TBB_LIBRARY_DIR ${TBB_LIBRARY} PATH) + +#TBB_CORRECT_LIB_DIR(TBB_LIBRARY) +#TBB_CORRECT_LIB_DIR(TBB_MALLOC_LIBRARY) +mark_as_advanced(TBB_LIBRARY TBB_MALLOC_LIBRARY) + +#-- Look for debug libraries +# Jiri: Changed the same way as for the release libraries. +find_library(TBB_LIBRARY_DEBUG ${_TBB_LIB_DEBUG_NAME} HINTS ${_TBB_LIBRARY_DIR} + PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) +find_library(TBB_MALLOC_LIBRARY_DEBUG ${_TBB_LIB_MALLOC_DEBUG_NAME} HINTS ${_TBB_LIBRARY_DIR} + PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) + +# Jiri: Self-built TBB stores the debug libraries in a separate directory. +# Extract path from TBB_LIBRARY_DEBUG name +get_filename_component(TBB_LIBRARY_DEBUG_DIR ${TBB_LIBRARY_DEBUG} PATH) + +#TBB_CORRECT_LIB_DIR(TBB_LIBRARY_DEBUG) +#TBB_CORRECT_LIB_DIR(TBB_MALLOC_LIBRARY_DEBUG) +mark_as_advanced(TBB_LIBRARY_DEBUG TBB_MALLOC_LIBRARY_DEBUG) + + +if (TBB_INCLUDE_DIR) + if (TBB_LIBRARY) + set (TBB_FOUND "YES") + set (TBB_LIBRARIES ${TBB_LIBRARY} ${TBB_MALLOC_LIBRARY} ${TBB_LIBRARIES}) + set (TBB_DEBUG_LIBRARIES ${TBB_LIBRARY_DEBUG} ${TBB_MALLOC_LIBRARY_DEBUG} ${TBB_DEBUG_LIBRARIES}) + set (TBB_INCLUDE_DIRS ${TBB_INCLUDE_DIR} CACHE PATH "TBB include directory" FORCE) + set (TBB_LIBRARY_DIRS ${TBB_LIBRARY_DIR} CACHE PATH "TBB library directory" FORCE) + # Jiri: Self-built TBB stores the debug libraries in a separate directory. + set (TBB_DEBUG_LIBRARY_DIRS ${TBB_LIBRARY_DEBUG_DIR} CACHE PATH "TBB debug library directory" FORCE) + mark_as_advanced(TBB_INCLUDE_DIRS TBB_LIBRARY_DIRS TBB_DEBUG_LIBRARY_DIRS TBB_LIBRARIES TBB_DEBUG_LIBRARIES) + message(STATUS "Found Intel TBB") + endif (TBB_LIBRARY) +endif (TBB_INCLUDE_DIR) + +if (NOT TBB_FOUND) + message("ERROR: Intel TBB NOT found!") + message(STATUS "Looked for Threading Building Blocks in ${_TBB_INSTALL_DIR}") + # do only throw fatal, if this pkg is REQUIRED + if (TBB_FIND_REQUIRED) + message(FATAL_ERROR "Could NOT find TBB library.") + endif (TBB_FIND_REQUIRED) +endif (NOT TBB_FOUND) + +endif (NOT _TBB_INSTALL_DIR) + +if (TBB_FOUND) + set(TBB_INTERFACE_VERSION 0) + FILE(READ "${TBB_INCLUDE_DIRS}/tbb/tbb_stddef.h" _TBB_VERSION_CONTENTS) + STRING(REGEX REPLACE ".*#define TBB_INTERFACE_VERSION ([0-9]+).*" "\\1" TBB_INTERFACE_VERSION "${_TBB_VERSION_CONTENTS}") + set(TBB_INTERFACE_VERSION "${TBB_INTERFACE_VERSION}") +endif (TBB_FOUND) From 56d93eb18b914096ad863ee7e495c32d4c8873d2 Mon Sep 17 00:00:00 2001 From: Patrick Niklaus Date: Tue, 20 May 2014 23:58:32 +0200 Subject: [PATCH 2/9] Replace omp atomic with std variant --- DataStructures/DynamicGraph.h | 5 ++--- DataStructures/Percent.h | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/DataStructures/DynamicGraph.h b/DataStructures/DynamicGraph.h index 934086c08c6..142c0b6718f 100644 --- a/DataStructures/DynamicGraph.h +++ b/DataStructures/DynamicGraph.h @@ -38,6 +38,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include template class DynamicGraph { @@ -198,7 +199,6 @@ template class DynamicGraph void DeleteEdge(const NodeIterator source, const EdgeIterator e) { Node &node = m_nodes[source]; -#pragma omp atomic --m_numEdges; --node.edges; BOOST_ASSERT(std::numeric_limits::max() != node.edges); @@ -226,7 +226,6 @@ template class DynamicGraph } } -#pragma omp atomic m_numEdges -= deleted; m_nodes[source].edges -= deleted; @@ -272,7 +271,7 @@ template class DynamicGraph }; NodeIterator m_numNodes; - EdgeIterator m_numEdges; + std::atomic_uint m_numEdges; std::vector m_nodes; DeallocatingVector m_edges; diff --git a/DataStructures/Percent.h b/DataStructures/Percent.h index 6c56ebedfe5..e3fc563cba7 100644 --- a/DataStructures/Percent.h +++ b/DataStructures/Percent.h @@ -30,6 +30,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "../Util/OpenMPWrapper.h" #include +#include class Percent { @@ -61,20 +62,18 @@ class Percent void printIncrement() { -#pragma omp atomic ++m_current_value; printStatus(m_current_value); } void printAddition(const unsigned addition) { -#pragma omp atomic m_current_value += addition; printStatus(m_current_value); } private: - unsigned m_current_value; + std::atomic_uint m_current_value; unsigned m_max_value; unsigned m_percent_interval; unsigned m_next_threshold; From 77641a9fcea10119b144c7286cdb32d9835451b6 Mon Sep 17 00:00:00 2001 From: Patrick Niklaus Date: Tue, 20 May 2014 23:59:30 +0200 Subject: [PATCH 3/9] Port StaticRTree to use TBB --- DataStructures/StaticRTree.h | 70 ++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/DataStructures/StaticRTree.h b/DataStructures/StaticRTree.h index a1f11908d38..268c583782f 100644 --- a/DataStructures/StaticRTree.h +++ b/DataStructures/StaticRTree.h @@ -48,6 +48,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + #include #include #include @@ -278,24 +280,33 @@ class StaticRTree HilbertCode get_hilbert_number; -// generate auxiliary vector of hilbert-values -#pragma omp parallel for schedule(guided) - for (uint64_t element_counter = 0; element_counter < m_element_count; ++element_counter) - { - input_wrapper_vector[element_counter].m_array_index = element_counter; - // Get Hilbert-Value for centroid in mercartor projection - DataT const ¤t_element = input_data_vector[element_counter]; - FixedPointCoordinate current_centroid = - DataT::Centroid(FixedPointCoordinate(coordinate_list.at(current_element.u).lat, - coordinate_list.at(current_element.u).lon), - FixedPointCoordinate(coordinate_list.at(current_element.v).lat, - coordinate_list.at(current_element.v).lon)); - current_centroid.lat = - COORDINATE_PRECISION * lat2y(current_centroid.lat / COORDINATE_PRECISION); - - uint64_t current_hilbert_value = get_hilbert_number(current_centroid); - input_wrapper_vector[element_counter].m_hilbert_value = current_hilbert_value; - } + // generate auxiliary vector of hilbert-values + tbb::parallel_for(tbb::blocked_range(0, m_element_count), + [&input_data_vector, + &input_wrapper_vector, + &get_hilbert_number, + &coordinate_list](const tbb::blocked_range& range) + { + for (uint64_t element_counter = range.begin(); element_counter != range.end(); ++element_counter) + { + WrappedInputElement ¤t_wrapper = input_wrapper_vector[element_counter]; + current_wrapper.m_array_index = element_counter; + + DataT const ¤t_element = input_data_vector[element_counter]; + + // Get Hilbert-Value for centroid in mercartor projection + FixedPointCoordinate current_centroid = + DataT::Centroid(FixedPointCoordinate(coordinate_list.at(current_element.u).lat, + coordinate_list.at(current_element.u).lon), + FixedPointCoordinate(coordinate_list.at(current_element.v).lat, + coordinate_list.at(current_element.v).lon)); + current_centroid.lat = + COORDINATE_PRECISION * lat2y(current_centroid.lat / COORDINATE_PRECISION); + + current_wrapper.m_hilbert_value = get_hilbert_number(current_centroid); + } + } + ); // open leaf file boost::filesystem::ofstream leaf_node_file(leaf_node_filename, std::ios::binary); @@ -383,17 +394,22 @@ class StaticRTree // reverse and renumber tree to have root at index 0 std::reverse(m_search_tree.begin(), m_search_tree.end()); -#pragma omp parallel for schedule(guided) - for (uint32_t i = 0; i < m_search_tree.size(); ++i) - { - TreeNode ¤t_tree_node = m_search_tree[i]; - for (uint32_t j = 0; j < current_tree_node.child_count; ++j) + uint32_t search_tree_size = m_search_tree.size(); + tbb::parallel_for(tbb::blocked_range(0, search_tree_size), + [this, &search_tree_size](const tbb::blocked_range& range) { - const uint32_t old_id = current_tree_node.children[j]; - const uint32_t new_id = m_search_tree.size() - old_id - 1; - current_tree_node.children[j] = new_id; + for (uint32_t i = range.begin(); i != range.end(); ++i) + { + TreeNode ¤t_tree_node = this->m_search_tree[i]; + for (uint32_t j = 0; j < current_tree_node.child_count; ++j) + { + const uint32_t old_id = current_tree_node.children[j]; + const uint32_t new_id = search_tree_size - old_id - 1; + current_tree_node.children[j] = new_id; + } + } } - } + ); // open tree file boost::filesystem::ofstream tree_node_file(tree_node_filename, std::ios::binary); From f487845e9de9de8514d87f0bdfafc2b015ffee9b Mon Sep 17 00:00:00 2001 From: Patrick Niklaus Date: Tue, 20 May 2014 23:57:48 +0200 Subject: [PATCH 4/9] Port Contractor to TBB --- CMakeLists.txt | 1 + Contractor/Contractor.h | 196 ++++++++++++++++++++++++---------------- 2 files changed, 119 insertions(+), 78 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9532114c682..8b62a608229 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -179,6 +179,7 @@ target_link_libraries(osrm-extract ${CMAKE_THREAD_LIBS_INIT}) find_package(TBB REQUIRED) target_link_libraries(osrm-extract ${TBB_LIBRARIES}) +target_link_libraries(osrm-prepare ${TBB_LIBRARIES}) include_directories(${TBB_INCLUDE_DIR}) find_package(Lua52) diff --git a/Contractor/Contractor.h b/Contractor/Contractor.h index 6ab403cf983..9bf8f8b3fd8 100644 --- a/Contractor/Contractor.h +++ b/Contractor/Contractor.h @@ -38,9 +38,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "../Util/OpenMPWrapper.h" #include "../Util/SimpleLogger.h" #include "../Util/StringUtil.h" +#include "../Util/TimingUtil.h" #include +#include +#include + #include #include #include @@ -125,6 +129,28 @@ class Contractor bool is_independent : 1; }; + + struct ThreadDataContainer + { + ThreadDataContainer(int number_of_nodes) : number_of_nodes(number_of_nodes) {} + + inline ContractorThreadData* getThreadData() + { + bool exists = false; + auto& ref = data.local(exists); + if (!exists) + { + ref = std::make_shared(number_of_nodes); + } + + return ref.get(); + } + + int number_of_nodes; + typedef tbb::enumerable_thread_specific> EnumerableThreadData; + EnumerableThreadData data; + }; + public: template Contractor(int nodes, ContainerT &input_edge_list) { @@ -262,39 +288,51 @@ class Contractor void Run() { + // for the preperation we can use a big grain size, which is much faster (probably cache) + constexpr size_t InitGrainSize = 100000; + constexpr size_t PQGrainSize = 100000; + // auto_partitioner will automatically increase the blocksize if we have + // a lot of data. It is *important* for the last loop iterations + // (which have a very small dataset) that it is devisible. + constexpr size_t IndependentGrainSize = 1; + constexpr size_t ContractGrainSize = 1; + constexpr size_t NeighboursGrainSize = 1; + constexpr size_t DeleteGrainSize = 1; + const NodeID number_of_nodes = contractor_graph->GetNumberOfNodes(); Percent p(number_of_nodes); - const unsigned thread_count = omp_get_max_threads(); - std::vector thread_data_list; - for (unsigned thread_id = 0; thread_id < thread_count; ++thread_id) - { - thread_data_list.push_back(new ContractorThreadData(number_of_nodes)); - } - std::cout << "Contractor is using " << thread_count << " threads" << std::endl; + ThreadDataContainer thread_data_list(number_of_nodes); NodeID number_of_contracted_nodes = 0; std::vector remaining_nodes(number_of_nodes); std::vector node_priorities(number_of_nodes); std::vector node_data(number_of_nodes); -// initialize priorities in parallel -#pragma omp parallel for schedule(guided) - for (int x = 0; x < (int)number_of_nodes; ++x) - { - remaining_nodes[x].id = x; - } + + // initialize priorities in parallel + tbb::parallel_for(tbb::blocked_range(0, number_of_nodes, InitGrainSize), + [&remaining_nodes](const tbb::blocked_range& range) + { + for (int x = range.begin(); x != range.end(); ++x) + { + remaining_nodes[x].id = x; + } + } + ); + std::cout << "initializing elimination PQ ..." << std::flush; -#pragma omp parallel - { - ContractorThreadData *data = thread_data_list[omp_get_thread_num()]; -#pragma omp parallel for schedule(guided) - for (int x = 0; x < (int)number_of_nodes; ++x) + tbb::parallel_for(tbb::blocked_range(0, number_of_nodes, PQGrainSize), + [this, &node_priorities, &node_data, &thread_data_list](const tbb::blocked_range& range) { - node_priorities[x] = EvaluateNodePriority(data, &node_data[x], x); + ContractorThreadData *data = thread_data_list.getThreadData(); + for (int x = range.begin(); x != range.end(); ++x) + { + node_priorities[x] = this->EvaluateNodePriority(data, &node_data[x], x); + } } - } + ); std::cout << "ok" << std::endl << "preprocessing " << number_of_nodes << " nodes ..." << std::flush; @@ -309,11 +347,7 @@ class Contractor std::cout << " [flush " << number_of_contracted_nodes << " nodes] " << std::flush; // Delete old heap data to free memory that we need for the coming operations - for (ContractorThreadData *data : thread_data_list) - { - delete data; - } - thread_data_list.clear(); + thread_data_list.data.clear(); // Create new priority array std::vector new_node_priority(remaining_nodes.size()); @@ -396,59 +430,67 @@ class Contractor // INFO: MAKE SURE THIS IS THE LAST OPERATION OF THE FLUSH! // reinitialize heaps and ThreadData objects with appropriate size - for (unsigned thread_id = 0; thread_id < thread_count; ++thread_id) - { - thread_data_list.push_back( - new ContractorThreadData(contractor_graph->GetNumberOfNodes())); - } + thread_data_list.number_of_nodes = contractor_graph->GetNumberOfNodes(); } const int last = (int)remaining_nodes.size(); -#pragma omp parallel - { - // determine independent node set - ContractorThreadData *const data = thread_data_list[omp_get_thread_num()]; -#pragma omp for schedule(guided) - for (int i = 0; i < last; ++i) + tbb::parallel_for(tbb::blocked_range(0, last, IndependentGrainSize), + [this, &node_priorities, &remaining_nodes, &thread_data_list](const tbb::blocked_range& range) { - const NodeID node = remaining_nodes[i].id; - remaining_nodes[i].is_independent = - IsNodeIndependent(node_priorities, data, node); + ContractorThreadData *data = thread_data_list.getThreadData(); + // determine independent node set + for (int i = range.begin(); i != range.end(); ++i) + { + const NodeID node = remaining_nodes[i].id; + remaining_nodes[i].is_independent = + this->IsNodeIndependent(node_priorities, data, node); + } } - } + ); + const auto first = stable_partition(remaining_nodes.begin(), remaining_nodes.end(), [](RemainingNodeData node_data) { return !node_data.is_independent; }); const int first_independent_node = first - remaining_nodes.begin(); -// contract independent nodes -#pragma omp parallel - { - ContractorThreadData *data = thread_data_list[omp_get_thread_num()]; -#pragma omp for schedule(guided) nowait - for (int position = first_independent_node; position < last; ++position) + + // contract independent nodes + tbb::parallel_for(tbb::blocked_range(first_independent_node, last, ContractGrainSize), + [this, &remaining_nodes, &thread_data_list](const tbb::blocked_range& range) { - NodeID x = remaining_nodes[position].id; - ContractNode(data, x); + ContractorThreadData *data = thread_data_list.getThreadData(); + for (int position = range.begin(); position != range.end(); ++position) + { + NodeID x = remaining_nodes[position].id; + this->ContractNode(data, x); + } } - - std::sort(data->inserted_edges.begin(), data->inserted_edges.end()); - } -#pragma omp parallel - { - ContractorThreadData *data = thread_data_list[omp_get_thread_num()]; -#pragma omp for schedule(guided) nowait - for (int position = first_independent_node; position < last; ++position) + ); + // make sure we really sort each block + tbb::parallel_for(thread_data_list.data.range(), + [&](const ThreadDataContainer::EnumerableThreadData::range_type& range) { - NodeID x = remaining_nodes[position].id; - DeleteIncomingEdges(data, x); + for (auto& data : range) + std::sort(data->inserted_edges.begin(), + data->inserted_edges.end()); } - } + ); + tbb::parallel_for(tbb::blocked_range(first_independent_node, last, DeleteGrainSize), + [this, &remaining_nodes, &thread_data_list](const tbb::blocked_range& range) + { + ContractorThreadData *data = thread_data_list.getThreadData(); + for (int position = range.begin(); position != range.end(); ++position) + { + NodeID x = remaining_nodes[position].id; + this->DeleteIncomingEdges(data, x); + } + } + ); + // insert new edges - for (unsigned thread_id = 0; thread_id < thread_count; ++thread_id) + for (auto& data : thread_data_list.data) { - ContractorThreadData &data = *thread_data_list[thread_id]; - for (const ContractorEdge &edge : data.inserted_edges) + for (const ContractorEdge &edge : data->inserted_edges) { auto current_edge_ID = contractor_graph->FindEdge(edge.source, edge.target); if (current_edge_ID < contractor_graph->EndEdges(edge.source)) @@ -466,19 +508,21 @@ class Contractor } contractor_graph->InsertEdge(edge.source, edge.target, edge.data); } - data.inserted_edges.clear(); + data->inserted_edges.clear(); } -// update priorities -#pragma omp parallel - { - ContractorThreadData *data = thread_data_list[omp_get_thread_num()]; -#pragma omp for schedule(guided) nowait - for (int position = first_independent_node; position < last; ++position) + + tbb::parallel_for(tbb::blocked_range(first_independent_node, last, NeighboursGrainSize), + [this, &remaining_nodes, &node_priorities, &node_data, &thread_data_list](const tbb::blocked_range& range) { - NodeID x = remaining_nodes[position].id; - UpdateNodeNeighbours(node_priorities, node_data, data, x); + ContractorThreadData *data = thread_data_list.getThreadData(); + for (int position = range.begin(); position != range.end(); ++position) + { + NodeID x = remaining_nodes[position].id; + this->UpdateNodeNeighbours(node_priorities, node_data, data, x); + } } - } + ); + // remove contracted nodes from the pool number_of_contracted_nodes += last - first_independent_node; remaining_nodes.resize(first_independent_node); @@ -510,11 +554,8 @@ class Contractor p.printStatus(number_of_contracted_nodes); } - for (ContractorThreadData *data : thread_data_list) - { - delete data; - } - thread_data_list.clear(); + + thread_data_list.data.clear(); } template inline void GetEdges(DeallocatingVector &edges) @@ -769,7 +810,6 @@ class Contractor true, true, false); - ; inserted_edges.push_back(new_edge); std::swap(new_edge.source, new_edge.target); new_edge.data.forward = false; From bbc04245632045245f79613cbdab929fa9bfd95d Mon Sep 17 00:00:00 2001 From: Patrick Niklaus Date: Wed, 21 May 2014 00:01:02 +0200 Subject: [PATCH 5/9] Set number of threads in TBB --- prepare.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/prepare.cpp b/prepare.cpp index 267e22e5b0d..9f6916d95ec 100644 --- a/prepare.cpp +++ b/prepare.cpp @@ -49,11 +49,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include #include +#include + typedef QueryEdge::EdgeData EdgeData; typedef DynamicGraph::InputEdge InputEdge; typedef StaticGraph::InputEdge StaticEdge; @@ -66,6 +69,8 @@ std::vector edge_list; int main(int argc, char *argv[]) { + + try { LogPolicy::GetInstance().Unmute(); @@ -73,7 +78,7 @@ int main(int argc, char *argv[]) std::chrono::steady_clock::now(); boost::filesystem::path config_file_path, input_path, restrictions_path, profile_path; - int requested_num_threads; + unsigned int requested_num_threads; // declare a group of options that will be allowed only on command line boost::program_options::options_description generic_options("Options"); @@ -95,7 +100,7 @@ int main(int argc, char *argv[]) ->default_value("profile.lua"), "Path to LUA routing profile")( "threads,t", - boost::program_options::value(&requested_num_threads)->default_value(8), + boost::program_options::value(&requested_num_threads)->default_value(8), "Number of threads to use"); // hidden options, will be allowed both on command line and in config file, but will not be @@ -174,7 +179,8 @@ int main(int argc, char *argv[]) return 1; } - int real_num_threads = std::min(omp_get_num_procs(), requested_num_threads); + unsigned int hardware_threads = std::max((unsigned int) 1, std::thread::hardware_concurrency()); + unsigned int real_num_threads = std::min(hardware_threads, requested_num_threads); SimpleLogger().Write() << "Input file: " << input_path.filename().string(); SimpleLogger().Write() << "Restrictions file: " << restrictions_path.filename().string(); @@ -182,7 +188,8 @@ int main(int argc, char *argv[]) SimpleLogger().Write() << "Threads: " << real_num_threads << " (requested " << requested_num_threads << ")"; - omp_set_num_threads(real_num_threads); + tbb::task_scheduler_init init(real_num_threads); + LogPolicy::GetInstance().Unmute(); boost::filesystem::ifstream restriction_stream(restrictions_path, std::ios::binary); TurnRestriction restriction; From a21fb5fc89325215d5d2699840a0c77845abf16d Mon Sep 17 00:00:00 2001 From: Patrick Niklaus Date: Tue, 20 May 2014 00:18:37 +0200 Subject: [PATCH 6/9] Use append operator instead of function, because function is inplace. --- Contractor/TemporaryStorage.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Contractor/TemporaryStorage.cpp b/Contractor/TemporaryStorage.cpp index 5301f11b46b..cdd8536429b 100644 --- a/Contractor/TemporaryStorage.cpp +++ b/Contractor/TemporaryStorage.cpp @@ -28,8 +28,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "TemporaryStorage.h" StreamData::StreamData() - : write_mode(true), temp_path(boost::filesystem::unique_path(temp_directory.append( - TemporaryFilePattern.begin(), TemporaryFilePattern.end()))), + : write_mode(true), temp_path(boost::filesystem::unique_path(temp_directory / TemporaryFilePattern)), temp_file(new boost::filesystem::fstream( temp_path, std::ios::in | std::ios::out | std::ios::trunc | std::ios::binary)), readWriteMutex(std::make_shared()) From f0b403bc2e7086bce60546f4670ea3e8a92bdd93 Mon Sep 17 00:00:00 2001 From: Patrick Niklaus Date: Wed, 21 May 2014 00:09:57 +0200 Subject: [PATCH 7/9] Set requested threads in TBB --- extractor.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/extractor.cpp b/extractor.cpp index b5fadb5383e..e27ccb097e1 100644 --- a/extractor.cpp +++ b/extractor.cpp @@ -42,12 +42,15 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include #include #include +#include + ExtractorCallbacks *extractor_callbacks; UUID uuid; @@ -60,7 +63,7 @@ int main(int argc, char *argv[]) std::chrono::steady_clock::now(); boost::filesystem::path config_file_path, input_path, profile_path; - int requested_num_threads; + unsigned int requested_num_threads; // declare a group of options that will be allowed only on command line boost::program_options::options_description generic_options("Options"); @@ -78,7 +81,7 @@ int main(int argc, char *argv[]) &profile_path)->default_value("profile.lua"), "Path to LUA routing profile")( "threads,t", - boost::program_options::value(&requested_num_threads)->default_value(8), + boost::program_options::value(&requested_num_threads)->default_value(8), "Number of threads to use"); // hidden options, will be allowed both on command line and in config file, but will not be @@ -163,18 +166,19 @@ int main(int argc, char *argv[]) return 1; } - int real_num_threads = std::min(omp_get_num_procs(), requested_num_threads); + unsigned int hardware_threads = std::max((unsigned int) 1, std::thread::hardware_concurrency()); + unsigned int real_num_threads = std::min(hardware_threads, requested_num_threads); SimpleLogger().Write() << "Input file: " << input_path.filename().string(); SimpleLogger().Write() << "Profile: " << profile_path.filename().string(); SimpleLogger().Write() << "Threads: " << real_num_threads << " (requested " << requested_num_threads << ")"; + tbb::task_scheduler_init init(real_num_threads); + /*** Setup Scripting Environment ***/ ScriptingEnvironment scripting_environment(profile_path.c_str()); - omp_set_num_threads(real_num_threads); - bool file_has_pbf_format(false); std::string output_file_name = input_path.string(); std::string restriction_fileName = input_path.string(); From bef113001a7dd625c90eb60ef1d42326ae9ff035 Mon Sep 17 00:00:00 2001 From: Patrick Niklaus Date: Wed, 21 May 2014 21:31:36 +0200 Subject: [PATCH 8/9] Add TBB to travis.ymk --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d33a23d0bef..8817e387471 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ install: - sudo apt-add-repository -y ppa:ubuntu-toolchain-r/test - sudo add-apt-repository -y ppa:boost-latest/ppa - sudo apt-get update >/dev/null - - sudo apt-get -q install libprotoc-dev libprotobuf7 libprotobuf-dev libosmpbf-dev libbz2-dev libstxxl-dev libstxxl1 libxml2-dev libzip-dev lua5.1 liblua5.1-0-dev rubygems + - sudo apt-get -q install libprotoc-dev libprotobuf7 libprotobuf-dev libosmpbf-dev libbz2-dev libstxxl-dev libstxxl1 libxml2-dev libzip-dev lua5.1 liblua5.1-0-dev rubygems libtbb-dev - sudo apt-get -q install g++-4.7 - sudo apt-get install libboost1.54-all-dev #luabind From e2daf5c2fc988198e0d89df15e572e2904d401ac Mon Sep 17 00:00:00 2001 From: Patrick Niklaus Date: Wed, 21 May 2014 21:48:53 +0200 Subject: [PATCH 9/9] Make some temporary variables const --- Contractor/Contractor.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Contractor/Contractor.h b/Contractor/Contractor.h index 9bf8f8b3fd8..e0344c1f54b 100644 --- a/Contractor/Contractor.h +++ b/Contractor/Contractor.h @@ -461,7 +461,7 @@ class Contractor ContractorThreadData *data = thread_data_list.getThreadData(); for (int position = range.begin(); position != range.end(); ++position) { - NodeID x = remaining_nodes[position].id; + const NodeID x = remaining_nodes[position].id; this->ContractNode(data, x); } } @@ -481,7 +481,7 @@ class Contractor ContractorThreadData *data = thread_data_list.getThreadData(); for (int position = range.begin(); position != range.end(); ++position) { - NodeID x = remaining_nodes[position].id; + const NodeID x = remaining_nodes[position].id; this->DeleteIncomingEdges(data, x); } } @@ -492,7 +492,7 @@ class Contractor { for (const ContractorEdge &edge : data->inserted_edges) { - auto current_edge_ID = contractor_graph->FindEdge(edge.source, edge.target); + const EdgeID current_edge_ID = contractor_graph->FindEdge(edge.source, edge.target); if (current_edge_ID < contractor_graph->EndEdges(edge.source)) { ContractorGraph::EdgeData ¤t_data =