Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… add-dy-decode
  • Loading branch information
LiuChiachi committed Oct 13, 2020
2 parents 6d7345e + 1b48f2f commit 8f250e1
Show file tree
Hide file tree
Showing 128 changed files with 6,344 additions and 1,867 deletions.
9 changes: 8 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,12 @@ repos:
name: copyright_checker
entry: python ./tools/codestyle/copyright.hook
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py|sh)$
exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$
- repo: local
hooks:
- id: shellcheck
name: shellcheck
entry: shellcheck
language: system
files: .sh$
92 changes: 68 additions & 24 deletions cmake/external/lite.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

if(NOT LINUX OR NOT WITH_MKL)
message("Paddle-lite will not build because the required Linux and MKL do not exist.")
if(NOT LINUX)
message("Paddle-lite will not build because the required Linux do not exist.")
set(WITH_LITE OFF)
return()
endif()
Expand Down Expand Up @@ -42,30 +42,30 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
endif()

# No quotes, so cmake can resolve it as a command with arguments.
set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
set(LITE_OPTIONAL_ARGS -DWITH_MKL=ON
-DLITE_WITH_CUDA=${WITH_GPU}
-DWITH_MKLDNN=OFF
-DLITE_WITH_X86=ON
-DLITE_WITH_PROFILE=OFF
-DWITH_LITE=OFF
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF
-DWITH_PYTHON=OFF
-DWITH_TESTING=OFF
-DLITE_BUILD_EXTRA=ON
-DCUDNN_ROOT=${CUDNN_ROOT}
-DLITE_WITH_STATIC_CUDA=OFF
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME}
-DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_ROOT=${XPU_SDK_ROOT}
-DLITE_WITH_ARM=OFF)

ExternalProject_Add(
if(WITH_ARM)
set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
message(WARNING "BUILD_COMMAND: ${LITE_BUILD_COMMAND}")
set(LITE_OPTIONAL_ARGS -DWITH_MKL=OFF
-DLITE_WITH_CUDA=OFF
-DWITH_MKLDNN=OFF
-DLITE_WITH_X86=OFF
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON
-DLITE_WITH_PROFILE=OFF
-DARM_TARGET_OS=armlinux
-DWITH_LITE=ON
-DWITH_PYTHON=OFF
-DWITH_TESTING=OFF
-DLITE_BUILD_EXTRA=ON
-DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_ROOT=${XPU_SDK_ROOT}
-DLITE_WITH_ARM=ON)
ExternalProject_Add(
${LITE_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/PaddlePaddle/Paddle-Lite.git"
GIT_TAG ${LITE_GIT_TAG}
PREFIX ${LITE_SOURCES_DIR}
PATCH_COMMAND mkdir -p ${LITE_SOURCES_DIR}/src/extern_lite-build/lite/gen_code && touch ${LITE_SOURCES_DIR}/src/extern_lite-build/lite/gen_code/__generated_code__.cc
UPDATE_COMMAND ""
BUILD_COMMAND ${LITE_BUILD_COMMAND}
INSTALL_COMMAND ""
Expand All @@ -81,7 +81,51 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
${LITE_OPTIONAL_ARGS}
)
)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8)
else()
set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib)
set(LITE_OPTIONAL_ARGS -DWITH_MKL=ON
-DLITE_WITH_CUDA=${WITH_GPU}
-DWITH_MKLDNN=OFF
-DLITE_WITH_X86=ON
-DLITE_WITH_PROFILE=OFF
-DWITH_LITE=OFF
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF
-DWITH_PYTHON=OFF
-DWITH_TESTING=OFF
-DLITE_BUILD_EXTRA=ON
-DCUDNN_ROOT=${CUDNN_ROOT}
-DLITE_WITH_STATIC_CUDA=OFF
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME}
-DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_ROOT=${XPU_SDK_ROOT}
-DLITE_WITH_ARM=OFF)

ExternalProject_Add(
${LITE_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/PaddlePaddle/Paddle-Lite.git"
GIT_TAG ${LITE_GIT_TAG}
PREFIX ${LITE_SOURCES_DIR}
UPDATE_COMMAND ""
BUILD_COMMAND ${LITE_BUILD_COMMAND}
INSTALL_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${LITE_CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
${LITE_OPTIONAL_ARGS}
)
endif()
ExternalProject_Get_property(${LITE_PROJECT} BINARY_DIR)
ExternalProject_Get_property(${LITE_PROJECT} SOURCE_DIR)
set(LITE_BINARY_DIR ${BINARY_DIR})
Expand All @@ -103,8 +147,8 @@ function(external_lite_libs alias path)
endif()
endfunction()

external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/inference_lite_lib/cxx/lib/libpaddle_full_api_shared.so)
set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/inference_lite_lib/cxx/lib/libpaddle_full_api_shared.so)
external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)
set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)

add_definitions(-DPADDLE_WITH_LITE)
add_definitions(-DLITE_WITH_LOG)
2 changes: 1 addition & 1 deletion cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ function(copy_part_of_thrid_party TARGET DST)
if (LITE_BINARY_DIR)
set(dst_dir "${DST}/third_party/install/lite")
copy(${TARGET}
SRCS ${LITE_BINARY_DIR}/inference_lite_lib/*
SRCS ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/*
DSTS ${dst_dir})
endif()
endfunction()
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/framework/distributed_strategy.proto
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ message AsyncConfig {
optional int32 send_wait_times = 7 [ default = 1 ];
optional bool runtime_split_send_recv = 8 [ default = false ];
optional bool launch_barrier = 9 [ default = true ];
optional string heter_worker_device_guard = 10 [ default = 'cpu' ];
}

message PipelineConfig { optional int32 micro_batch = 1 [ default = 1 ]; }
Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/framework/tensor_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/tensor_util.h"

#include <algorithm>
#include <limits>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/profiler.h"

Expand Down Expand Up @@ -943,6 +946,12 @@ void TensorFromDLPack(const ::DLTensor& dl_tensor, framework::Tensor* dst) {
#endif
}

template <typename T>
std::string format_tensor(const framework::Tensor& tensor) {
// TODO(zhiqiu): use the print option to format tensor.
return "NOT IMPLEMENTED";
}

template <typename T>
std::ostream& print_tensor(std::ostream& os, const framework::Tensor& tensor) {
auto inspect = tensor.data<T>();
Expand Down
20 changes: 20 additions & 0 deletions paddle/fluid/framework/tensor_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,26 @@ limitations under the License. */
namespace paddle {
namespace framework {

class PrintOptions {
public:
static PrintOptions& Instance() {
static PrintOptions instance;
return instance;
}
~PrintOptions() {}
PrintOptions(const PrintOptions& o) = delete;
const PrintOptions& operator=(const PrintOptions& o) = delete;

int precision = 8;
int threshold = 1000;
int edgeitems = 3;
int linewidth = 75;
bool sci_mode = false;

private:
PrintOptions() {}
};

// NOTE(zcd): Because TensorCopy is an async operation, when the src_place
// and dst_place are two different GPU, to ensure that the operation can
// be carried out correctly, there is a src_ctx wait operation in TensorCopy.
Expand Down
8 changes: 5 additions & 3 deletions paddle/fluid/imperative/tests/test_tracer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
//

#include <paddle/fluid/framework/op_registry.h>

#include <memory>
#include <set>
#include <string>
#include <vector>

#include "gtest/gtest.h"
#include "paddle/fluid/imperative/basic_engine.h"
#include "paddle/fluid/imperative/tracer.h"
Expand Down Expand Up @@ -286,9 +288,9 @@ TEST(test_tracer, test_unique_name_generator) {
ASSERT_STREQ("fc_1", fc_2.c_str());
// use `eager_tmp` as key if not specify it.
auto tmp_var_2 = tracer.GenerateUniqueName();
ASSERT_STREQ("eager_tmp_2", tmp_var_2.c_str());
auto tmp_var_3 = tracer.GenerateUniqueName("eager_tmp");
ASSERT_STREQ("eager_tmp_3", tmp_var_3.c_str());
ASSERT_STREQ("dygraph_tmp_2", tmp_var_2.c_str());
auto tmp_var_3 = tracer.GenerateUniqueName("dygraph_tmp");
ASSERT_STREQ("dygraph_tmp_3", tmp_var_3.c_str());
}

TEST(test_tracer, test_current_tracer) {
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/imperative/tracer.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <string>
#include <unordered_map>
#include <vector>

#include "ThreadPool.h"
#include "paddle/fluid/imperative/basic_engine.h"
#include "paddle/fluid/imperative/jit/program_desc_tracer.h"
Expand All @@ -32,7 +33,7 @@ namespace imperative {
class UniqueNameGenerator {
public:
explicit UniqueNameGenerator(std::string prefix = "") : prefix_(prefix) {}
std::string Generate(std::string key = "eager_tmp") {
std::string Generate(std::string key = "dygraph_tmp") {
return prefix_ + key + "_" + std::to_string(id_++);
}

Expand Down Expand Up @@ -83,7 +84,7 @@ class Tracer {
// name like `tmp_0` in some cases when transform dygraph into static layers.
// So we modify the default prefix key into `eager_tmp` to distinguish with
// static graph.
std::string GenerateUniqueName(std::string key = "eager_tmp") {
std::string GenerateUniqueName(std::string key = "dygraph_tmp") {
return generator_->Generate(key);
}

Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,10 @@ struct Argument {

// Scales for variables to be quantized
DECL_ARGUMENT_FIELD(quant_var_scales, QuantVarScales, VarQuantScale);

// A set of op types to enable their bfloat16 kernels
DECL_ARGUMENT_FIELD(bfloat16_enabled_op_types, Bfloat16EnabledOpTypes,
std::unordered_set<std::string>);
#endif

// Passed from config.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,11 @@ void LiteSubgraphPass::SetUpEngine(
} else if (use_xpu) {
target_type = TARGET(kXPU);
} else {
#ifdef PADDLE_WITH_ARM
target_type = TARGET(kARM);
#else
target_type = TARGET(kX86);
#endif
}

paddle::lite_api::PrecisionType precision_type =
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(use_mkldnn_);
CP_MEMBER(mkldnn_enabled_op_types_);
CP_MEMBER(mkldnn_cache_capacity_);
// Bfloat16 related.
CP_MEMBER(use_mkldnn_bfloat16_);
CP_MEMBER(bfloat16_enabled_op_types_);
// Quantization related.
CP_MEMBER(use_mkldnn_quantizer_);
CP_MEMBER(mkldnn_quantizer_config_);
Expand Down Expand Up @@ -417,6 +420,8 @@ std::string AnalysisConfig::SerializeInfoCache() {

ss << use_mkldnn_quantizer_;
ss << use_mkldnn_bfloat16_;
for (auto &item : bfloat16_enabled_op_types_) ss << item;
ss << ";";
ss << model_from_memory_;

ss << with_profile_;
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,10 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetQuantizeExcludedOpIds(
config_.mkldnn_quantizer_config()->excluded_op_ids());
}
if (config_.use_mkldnn_bfloat16_) {
LOG(INFO) << "Bfloat16 is enabled";
argument_.SetBfloat16EnabledOpTypes(config_.bfloat16_enabled_op_types_);
}
#endif

auto passes = config_.pass_builder()->AllPasses();
Expand Down
17 changes: 11 additions & 6 deletions paddle/fluid/inference/api/demo_ci/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,24 @@ if (WIN32)
add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)
if (MSVC_STATIC_CRT)
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
if (WITH_MKL)
set(FLAG_OPENMP "/openmp")
endif()
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}")
safe_set_static_flag()
if (WITH_STATIC_LIB)
add_definitions(-DSTATIC_LIB)
endif()
endif()
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
if(WITH_MKL)
set(FLAG_OPENMP "-fopenmp")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 ${FLAG_OPENMP}")
endif()
message("flags" ${CMAKE_CXX_FLAGS})

if(WITH_GPU)
if(NOT WIN32)
Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,14 @@ struct PD_INFER_DECL AnalysisConfig {
///
bool mkldnn_bfloat16_enabled() const { return use_mkldnn_bfloat16_; }

/// \brief Specify the operator type list to use Bfloat16 acceleration.
///
/// \param op_list The operator type list.
///
void SetBfloat16Op(std::unordered_set<std::string> op_list) {
bfloat16_enabled_op_types_ = op_list;
}

///
/// \brief A boolean state telling whether the thread local CUDA stream is
/// enabled.
Expand Down Expand Up @@ -606,6 +614,7 @@ struct PD_INFER_DECL AnalysisConfig {
bool use_mkldnn_quantizer_{false};
std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_;
bool use_mkldnn_bfloat16_{false};
std::unordered_set<std::string> bfloat16_enabled_op_types_;

// If the config is already used on a predictor, it becomes invalid.
// Any config can only be used with one predictor.
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/inference/lite/tensor_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ platform::Place GetNativePlace(const TargetType& type, int id = 0) {
switch (type) {
case TargetType::kHost:
case TargetType::kX86:
case TargetType::kARM:
return platform::CPUPlace();
case TargetType::kCUDA:
return platform::CUDAPlace(id);
Expand Down
Loading

0 comments on commit 8f250e1

Please sign in to comment.