Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… normalize-op
  • Loading branch information
LutaoChu committed Aug 14, 2020
2 parents b321656 + ba574c8 commit 935fdf2
Show file tree
Hide file tree
Showing 143 changed files with 2,651 additions and 490 deletions.
2 changes: 1 addition & 1 deletion cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ function(op_library TARGET)
set(CUDNN_FILE)
set(mkldnn_cc_srcs)
set(MKLDNN_FILE)
set(op_common_deps operator op_registry math_function layer)
set(op_common_deps operator op_registry math_function layer common_infer_shape_functions)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
Expand Down
14 changes: 7 additions & 7 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,23 +164,23 @@ if(WITH_PYTHON)
if (NOT WIN32)
add_custom_command(TARGET framework_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fleet/proto
COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_BINARY_DIR}/python/paddle/fleet/proto/__init__.py
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto
COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/__init__.py
COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/
COMMAND cp distributed_strategy_*.py ${PADDLE_BINARY_DIR}/python/paddle/fleet/proto
COMMAND cp distributed_strategy_*.py ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto
COMMENT "Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
else(NOT WIN32)
string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/")
string(REPLACE "/" "\\" fleet_proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fleet/proto/")
string(REPLACE "/" "\\" fleet_proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/")
add_custom_command(TARGET framework_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fleet/proto
COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_BINARY_DIR}/python/paddle/fleet/proto/__init__.py
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto
COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/__init__.py
COMMAND copy /Y *.py ${proto_dstpath}
COMMAND copy /Y distributed_strategy_*.py ${fleet_proto_dstpath}
COMMENT "Copy generated python proto into directory paddle/fluid/proto."
COMMENT "Copy generated python proto into directory paddle/fleet/proto."
COMMENT "Copy generated python proto into directory paddle/distributed/fleet/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif(NOT WIN32)
endif()
Expand Down
25 changes: 25 additions & 0 deletions paddle/fluid/framework/op_desc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/op_desc.h"

#include <algorithm>
#include <functional>
#include <mutex> // NOLINT
#include <string>
#include <unordered_map>
#include <utility>

#include "glog/logging.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_call_stack.h"
Expand Down Expand Up @@ -51,6 +53,29 @@ class CompileTimeInferShapeContext : public InferShapeContext {

std::vector<std::string> Outputs(const std::string &name) const override;

std::string GetInputNameByIdx(size_t idx) const override {
auto &op_proto =
paddle::framework::OpInfoMap::Instance().Get(op_.Type()).proto_;
PADDLE_ENFORCE_LT(idx, op_proto->inputs().size(),
platform::errors::OutOfRange(
"The index should be less than the size of inputs of "
"operator %s, but got index is %d and size is %d",
op_.Type(), idx, op_proto->inputs().size()));
return op_proto->inputs()[idx].name();
}

std::string GetOutputNameByIdx(size_t idx) const override {
auto &op_proto =
paddle::framework::OpInfoMap::Instance().Get(op_.Type()).proto_;
PADDLE_ENFORCE_LT(
idx, op_proto->outputs().size(),
platform::errors::OutOfRange(
"The index should be less than the size of outputs of "
"operator %s, but got index is %d and size is %d",
op_.Type(), idx, op_proto->outputs().size()));
return op_proto->outputs()[idx].name();
}

void ShareDim(const std::string &in, const std::string &out, size_t i = 0,
size_t j = 0) override {
PADDLE_ENFORCE_LT(i, Inputs(in).size());
Expand Down
27 changes: 26 additions & 1 deletion paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/operator.h"

#include <gflags/gflags.h>
#include <glog/logging.h>

Expand All @@ -20,13 +22,13 @@ limitations under the License. */
#include <string>
#include <unordered_set>
#include <vector>

#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/details/nan_inf_utils.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_call_stack.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
#include "paddle/fluid/framework/unused_var_check.h"
Expand Down Expand Up @@ -604,6 +606,29 @@ class RuntimeInferShapeContext : public InferShapeContext {
return op_.Outputs(name);
}

std::string GetInputNameByIdx(size_t idx) const override {
auto& op_proto =
paddle::framework::OpInfoMap::Instance().Get(op_.Type()).proto_;
PADDLE_ENFORCE_LT(idx, op_proto->inputs().size(),
platform::errors::OutOfRange(
"The index should be less than the size of inputs of "
"operator %s, but got index is %d and size is %d",
op_.Type(), idx, op_proto->inputs().size()));
return op_proto->inputs()[idx].name();
}

std::string GetOutputNameByIdx(size_t idx) const override {
auto& op_proto =
paddle::framework::OpInfoMap::Instance().Get(op_.Type()).proto_;
PADDLE_ENFORCE_LT(
idx, op_proto->outputs().size(),
platform::errors::OutOfRange(
"The index should be less than the size of outputs of "
"operator %s, but got index is %d and size is %d",
op_.Type(), idx, op_proto->outputs().size()));
return op_proto->outputs()[idx].name();
}

void ShareDim(const std::string& in, const std::string& out, size_t i = 0,
size_t j = 0) override {
auto in_it = ctx_.inputs.find(in);
Expand Down
3 changes: 0 additions & 3 deletions paddle/fluid/framework/operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,6 @@ constexpr char kZeroVarSuffix[] = "@ZERO";
/// Variables with this suffix are the new Gradient.
constexpr char kNewGradSuffix[] = "@NEWGRAD@";

/// Variables with this suffix are the loaded from pre-train model.
constexpr char kLoadedVarSuffix[] = "@LOADED";

/// RuntimeContext is used to relate input/output names of Operator with
/// the corresponding variables in name scope.
/// If an Op has attribute kEnableCacheRuntimeContext, it means that in a same
Expand Down
4 changes: 3 additions & 1 deletion paddle/fluid/framework/shape_inference.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License. */

#include <string>
#include <vector>

#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/framework.pb.h"
Expand Down Expand Up @@ -52,7 +53,8 @@ class InferShapeContext {
const std::vector<DDim> &dims) = 0;
virtual void SetReaderDims(const std::string &name,
const std::vector<DDim> &dims);

virtual std::string GetInputNameByIdx(size_t idx) const = 0;
virtual std::string GetOutputNameByIdx(size_t idx) const = 0;
virtual AttrReader Attrs() const = 0;
virtual std::vector<std::string> Inputs(const std::string &name) const = 0;
virtual std::vector<std::string> Outputs(const std::string &name) const = 0;
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/imperative/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ cc_library(imperative_flag SRCS flags.cc DEPS gflags)

cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows var_type_traits op_kernel_type data_transform)
cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry)
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows selected_rows_functor var_type_traits layer math_function)
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows selected_rows_functor var_type_traits layer math_function)
add_subdirectory(jit)

cc_library(tracer SRCS tracer.cc DEPS layer engine program_desc_tracer)
cc_library(amp SRCS amp_auto_cast.cc DEPS layer )
cc_library(tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp)
cc_library(basic_engine SRCS basic_engine.cc DEPS layer gradient_accumulator)
cc_library(engine SRCS basic_engine.cc partial_grad_engine.cc DEPS layer gradient_accumulator)
cc_library(imperative_profiler SRCS profiler.cc)
Expand Down
169 changes: 169 additions & 0 deletions paddle/fluid/imperative/amp_auto_cast.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/imperative/amp_auto_cast.h"

#include <algorithm>
#include <memory>
#include <set>
#include <string>
#include <unordered_set>
#include <utility>

#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/imperative/variable_wrapper.h"

namespace paddle {
namespace imperative {

AmpOperators::AmpOperators()
: allow_ops_(new std::unordered_set<std::string>()),
block_ops_(new std::unordered_set<std::string>()) {}
AmpOperators::~AmpOperators() {}

AmpOperators& AmpOperators::Instance() {
static AmpOperators instance;
return instance;
}

std::shared_ptr<std::unordered_set<std::string>> AmpOperators::GetAllowOps() {
return allow_ops_;
}

std::shared_ptr<std::unordered_set<std::string>> AmpOperators::GetBlockOps() {
return block_ops_;
}

inline std::string GetDtypeStr(
const std::shared_ptr<imperative::VarBase>& var) {
return framework::DataTypeToString(var->DataType());
}

inline bool NeedCast(const std::shared_ptr<VarBase>& var) {
if (!platform::is_gpu_place(var->Place())) {
return false;
}
if (var->DataType() == framework::proto::VarType::FP32 ||
var->DataType() == framework::proto::VarType::FP16) {
return true;
} else {
return false;
}
}

// NOTE: Trace a cast op, so if a var is casted from fp32 to fp16, then the grad
// var will be cast back from fp16 to fp32 during backward phase.
static inline std::shared_ptr<imperative::VarBase> CastToType(
const std::shared_ptr<VarBase>& var,
const framework::proto::VarType::Type dst_type) {
const auto& tracer = imperative::GetCurrentTracer();
imperative::NameVarBaseMap ins = {{"X", {var}}};
framework::AttributeMap attrs = {{"in_dtype", var->DataType()},
{"out_dtype", dst_type}};
auto out = std::shared_ptr<imperative::VarBase>(
new imperative::VarBase(tracer->GenerateUniqueName()));
imperative::NameVarBaseMap outs = {{"Out", {out}}};

{
AutoCastGuard guard(tracer, false);
tracer->TraceOp("cast", ins, outs, std::move(attrs));
}

return out;
}

static inline std::shared_ptr<imperative::VarBase> CastToFP16(
const std::shared_ptr<VarBase>& var) {
auto dst_type = framework::proto::VarType::FP16;
if (NeedCast(var) && (var->DataType() != dst_type)) {
return CastToType(var, dst_type);
}
return var;
}

static inline std::shared_ptr<imperative::VarBase> CastToFP32(
const std::shared_ptr<VarBase>& var) {
auto dst_type = framework::proto::VarType::FP32;
if (NeedCast(var) && (var->DataType() != dst_type)) {
return CastToType(var, dst_type);
}
return var;
}

static inline framework::proto::VarType::Type GetPromoteType(
const NameVarBaseMap& ins) {
auto dst_type = framework::proto::VarType::FP16;
for (const auto& pair : ins) {
for (const auto& var : pair.second) {
if (var->DataType() == framework::proto::VarType::FP32) {
dst_type = var->DataType();
break;
}
}
}
return dst_type;
}

NameVarBaseMap AutoCastInputs(const std::string& op_type,
const NameVarBaseMap& ins) {
NameVarBaseMap new_ins = {};
if (AmpOperators::Instance().GetAllowOps()->count(op_type)) {
for (const auto& pair : ins) {
VLOG(5) << "Op(" << op_type << "): Cast " << pair.first << " from "
<< GetDtypeStr(*pair.second.cbegin()) << " to float16";
for (const auto& var : pair.second) {
auto new_var = CastToFP16(var);
new_ins[pair.first].emplace_back(new_var);
}
}
return new_ins;
} else if (AmpOperators::Instance().GetBlockOps()->count(op_type)) {
for (const auto& pair : ins) {
VLOG(5) << "Op(" << op_type << "): Cast " << pair.first << " from "
<< GetDtypeStr(*pair.second.cbegin()) << " to float";
for (const auto& var : pair.second) {
auto new_var = CastToFP32(var);
new_ins[pair.first].emplace_back(new_var);
}
}
return new_ins;
} else {
auto dst_type = GetPromoteType(ins);

for (const auto& pair : ins) {
VLOG(5) << "Op(" << op_type << "): Cast " << pair.first << " from "
<< GetDtypeStr(*pair.second.cbegin()) << " to "
<< framework::DataTypeToString(dst_type);
for (const auto& var : pair.second) {
// NOTE(zhiqiu): Conv + BN always occur together, we needn't
// cast X of batch_norm to FP32, which is produced by conv as FP16 type.
if (op_type == "batch_norm" && pair.first == "X" &&
dst_type == framework::proto::VarType::FP32) {
new_ins[pair.first].emplace_back(var);
continue;
}
auto new_var = dst_type == framework::proto::VarType::FP32
? CastToFP32(var)
: CastToFP16(var);
new_ins[pair.first].emplace_back(new_var);
}
}
return new_ins;
}
return ins;
}

} // namespace imperative
} // namespace paddle
Loading

0 comments on commit 935fdf2

Please sign in to comment.