Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#179 from graphcore/revert-171-resize_…
Browse files Browse the repository at this point in the history
…tensor_inside_run

Revert "Resize tensor inside ipu_backend::Run()"
  • Loading branch information
yiakwy-xpu-ml-framework-team authored Sep 22, 2021
2 parents 4bab0d4 + 9495c38 commit bd28fb7
Show file tree
Hide file tree
Showing 10 changed files with 63 additions and 66 deletions.
6 changes: 4 additions & 2 deletions paddle/fluid/framework/ipu/ipu_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,17 @@ void IpuBackend::Compile(ir::Graph* graph,
compiler_->LowerWeights(graph, scope_);
compiler_->LowerBody(graph);
compiler_->InitOutputs(fetch_list);
executor_->SetOutputTensorId(compiler_->GetOutputTensors());
executor_->SetWeights(compiler_->GetWeights());
VLOG(10) << "leave IpuBackend::Compile";
}

void IpuBackend::Run(const framework::ExecutionContext& ctx) {
void IpuBackend::Run(const std::vector<const Tensor*>& inputs,
const std::vector<Tensor*>& outputs) {
Prepare();
auto inputs_id = compiler_->GetInputs();
auto outputs_id = compiler_->GetOutputs();
executor_->Run(inputs_id, outputs_id, ctx);
executor_->Run(inputs_id, inputs, outputs_id, outputs);
}

void IpuBackend::Prepare() {
Expand Down
8 changes: 5 additions & 3 deletions paddle/fluid/framework/ipu/ipu_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ limitations under the License. */
#include "paddle/fluid/framework/ipu/ipu_compiler.h"
#include "paddle/fluid/framework/ipu/ipu_executor.h"
#include "paddle/fluid/framework/ipu/ipu_strategy.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/enforce.h"
Expand Down Expand Up @@ -54,11 +53,15 @@ class IpuBackend {
void Compile(ir::Graph *graph, const std::vector<std::string> &feed_list,
const std::vector<std::string> &fetch_list);

// need doc
void Prepare();

// what run does include:
// 1. construct forward onnx graph
// 2. graph-level optimization
// 3. autodiff
void Run(const framework::ExecutionContext &ctx);
void Run(const std::vector<const Tensor *> &inputs,
const std::vector<Tensor *> &outputs);

Executor &GetExecutor() { return *executor_; }

Expand All @@ -75,7 +78,6 @@ class IpuBackend {

private:
int UpperIpuNum();
void Prepare();

private:
std::shared_ptr<Compiler> compiler_;
Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/framework/ipu/ipu_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,15 @@ std::vector<int64_t> Compiler::GetTensorShape(const std::string& name) {
return builder_->getTensorShape(tensors_[name]);
}

std::map<std::string, std::string> Compiler::GetOutputTensors() {
std::map<std::string, std::string> outputs;
for (const auto& fetch_name : fetch_list_) {
auto tensorid = tensors_[fetch_name];
outputs[fetch_name] = tensorid;
}
return outputs;
}

std::vector<popart::TensorId>& Compiler::GetWeights() { return weights_; }

std::string Compiler::GetModelProto() { return builder_->getModelProto(); }
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/framework/ipu/ipu_compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class Compiler {
std::vector<popart::TensorId> GetOutputs() { return outputs_; }
std::map<std::string, popart::TensorId> GetTensors() { return tensors_; }
std::vector<int64_t> GetTensorShape(const std::string &name);
std::map<std::string, std::string> GetOutputTensors();
std::vector<popart::TensorId> &GetWeights();

std::string GetModelProto();
Expand Down
34 changes: 18 additions & 16 deletions paddle/fluid/framework/ipu/ipu_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,9 @@ void Executor::Prepare(const std::string &proto,
}

void Executor::Run(const std::vector<popart::TensorId> &inputs_id,
const std::vector<const Tensor *> &inputs,
const std::vector<popart::TensorId> &outputs_id,
const framework::ExecutionContext &ctx) {
auto inputs = ctx.MultiInput<framework::Tensor>("FeedList");
auto outputs = ctx.MultiOutput<framework::Tensor>("FetchList");
// inputs
const std::vector<Tensor *> &outputs) {
std::map<popart::TensorId, popart::IArray &> popart_inputs;
std::map<popart::TensorId, PaddleIArray> input_wrappers;
for (size_t i = 0; i < inputs.size(); i++) {
Expand All @@ -94,23 +92,12 @@ void Executor::Run(const std::vector<popart::TensorId> &inputs_id,
input_wrappers.emplace(tensor_id, PaddleIArray(tensor));
popart_inputs.emplace(tensor_id, input_wrappers.at(tensor_id));
}
// anchors

std::map<popart::TensorId, popart::IArray &> popart_anchors;
std::map<popart::TensorId, PaddleIArray> anchor_wrappers;
for (size_t i = 0; i < outputs.size(); i++) {
auto tensor_id = outputs_id[i];
auto tensor = const_cast<Tensor *>(outputs[i]);
// get dims & dtype from session
auto fetch_info = session_->getInfo(tensor_id);
auto output_shape = fetch_info.shape();
if (ipu_strategy_->batches_per_step > 1) {
output_shape.insert(output_shape.begin(),
ipu_strategy_->batches_per_step);
}
tensor->Resize(framework::make_ddim(output_shape));
auto fetch_dtype = fetch_info.dataType();
auto paddle_type = PopartType2VarType(fetch_dtype);
tensor->mutable_data(ctx.GetPlace(), paddle_type);
anchor_wrappers.emplace(tensor_id, PaddleIArray(tensor));
popart_anchors.emplace(tensor_id, anchor_wrappers.at(tensor_id));
}
Expand Down Expand Up @@ -204,6 +191,21 @@ void Executor::SetIpuStrategy(const IpuStrategy &strategy) {
ipu_strategy_ = &strategy;
}

void Executor::SetOutputTensorId(
const std::map<std::string, std::string> &outputs) {
outputs_ = outputs;
}

std::vector<int64_t> Executor::GetOutputShape(const std::string &fetch_name) {
auto tensor_id = outputs_[fetch_name];
auto fetch_info = session_->getInfo(tensor_id);
auto output_shape = fetch_info.shape();
if (ipu_strategy_->batches_per_step > 1) {
output_shape.insert(output_shape.begin(), ipu_strategy_->batches_per_step);
}
return output_shape;
}

float Executor::GetLRFromScope() {
auto lr_var = scope_->GetVar(opt_info.GetLRVarName());
auto tensor = lr_var->Get<framework::LoDTensor>();
Expand Down
9 changes: 7 additions & 2 deletions paddle/fluid/framework/ipu/ipu_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ limitations under the License. */
#include "paddle/fluid/framework/ipu/ipu_optimizer.h"
#include "paddle/fluid/framework/ipu/ipu_strategy.h"
#include "paddle/fluid/framework/ipu/ipu_utils.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"

namespace paddle {
Expand All @@ -39,8 +38,9 @@ class Executor {
const std::vector<popart::TensorId> &outputs,
std::shared_ptr<popart::DeviceInfo> device);
void Run(const std::vector<popart::TensorId> &inputs_id,
const std::vector<const Tensor *> &inputs,
const std::vector<popart::TensorId> &outputs_id,
const framework::ExecutionContext &ctx);
const std::vector<Tensor *> &outputs);

// Optimizer
void SetOptimizerType(const std::string &type);
Expand All @@ -61,6 +61,10 @@ class Executor {
// Strategy
void SetIpuStrategy(const IpuStrategy &strategy);

// Outputs
void SetOutputTensorId(const std::map<std::string, std::string> &outputs);
std::vector<int64_t> GetOutputShape(const std::string &fetch_name);

private:
float GetLRFromScope();

Expand All @@ -73,6 +77,7 @@ class Executor {
const IpuStrategy *ipu_strategy_ = nullptr;
popart::WeightsIO weights_io_;
std::vector<popart::TensorId> weights_;
std::map<std::string, std::string> outputs_;
};

} // namespace ipu
Expand Down
36 changes: 2 additions & 34 deletions paddle/fluid/framework/ipu/ipu_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ std::size_t PaddleIArray::nelms() const {

const popart::Shape PaddleIArray::shape() const { return shape_; }

popart::DataType VarType2PopartType(const proto::VarType::Type type) {
popart::DataType VarType2PopartType(proto::VarType::Type type) {
switch (type) {
case proto::VarType::UINT8:
return popart::DataType::UINT8;
Expand Down Expand Up @@ -69,39 +69,7 @@ popart::DataType VarType2PopartType(const proto::VarType::Type type) {
}
}

proto::VarType::Type PopartType2VarType(const popart::DataType type) {
switch (type) {
case popart::DataType::UINT8:
return proto::VarType::UINT8;
case popart::DataType::INT8:
return proto::VarType::INT8;
case popart::DataType::INT16:
return proto::VarType::INT16;
case popart::DataType::INT32:
return proto::VarType::INT32;
case popart::DataType::INT64:
return proto::VarType::INT64;
case popart::DataType::BOOL:
return proto::VarType::BOOL;
case popart::DataType::DOUBLE:
return proto::VarType::FP64;
case popart::DataType::FLOAT:
return proto::VarType::FP32;
case popart::DataType::FLOAT16:
return proto::VarType::FP16;
case popart::DataType::BFLOAT16:
return proto::VarType::BF16;
case popart::DataType::COMPLEX64:
return proto::VarType::COMPLEX64;
case popart::DataType::COMPLEX128:
return proto::VarType::COMPLEX128;
default:
PADDLE_THROW(paddle::platform::errors::Unavailable(
"Unsupported Paddle var type."));
}
}

popart::DataType OnnxDtype2PopartType(const int type) {
popart::DataType OnnxDtype2PopartType(int type) {
auto dtype = static_cast<ONNXDataType>(type);
switch (dtype) {
case ONNXDataType::BOOL:
Expand Down
5 changes: 2 additions & 3 deletions paddle/fluid/framework/ipu/ipu_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,8 @@ class PaddleIArray final : public popart::IArray {
std::vector<int64_t> shape_;
};

popart::DataType VarType2PopartType(const proto::VarType::Type type);
proto::VarType::Type PopartType2VarType(const popart::DataType type);
popart::DataType OnnxDtype2PopartType(const int type);
popart::DataType VarType2PopartType(proto::VarType::Type type);
popart::DataType OnnxDtype2PopartType(int type);
bool GetBoolEnv(std::string str);

template <typename T>
Expand Down
18 changes: 14 additions & 4 deletions paddle/fluid/operators/ipu_runtime_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,23 @@ class IpuRuntimeKernel : public framework::OpKernel<T> {
ctx.device_context());
ipu_backend->AttachDevice(ipu_ctx.DeviceId());
}

VLOG(4) << "IpuBackend prepare session";
ipu_backend->Prepare();
VLOG(4) << "IpuRuntime Kernel, begin to run graph";
ipu_backend->Run(ctx);

// post-run
auto inputs = ctx.MultiInput<framework::Tensor>("FeedList");
auto outputs = ctx.MultiOutput<framework::Tensor>("FetchList");
auto output_names = ctx.OutputNames("FetchList");
for (size_t i = 0; i < outputs.size(); ++i) {
auto* out = outputs[i];
auto oshape = ipu_backend->GetExecutor().GetOutputShape(output_names[i]);
out->Resize(framework::make_ddim(oshape));
// TODO(alleng) support muti-output dtypes
// maybe get dtype from ipu_backend
out->mutable_data<T>(ctx.GetPlace());
}

ipu_backend->Run(inputs, outputs);

// resize tensor when tensor.dims() is empty
for (size_t i = 0; i < outputs.size(); ++i) {
auto* out = outputs[i];
Expand Down
3 changes: 1 addition & 2 deletions paddle/fluid/operators/jit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ endif()

cc_library(jit_kernel_helper INTERFACE SRCS ${jit_kernel_cc_srcs} DEPS jit_kernel_base ${JIT_KERNEL_DEPS})
cc_test(jit_kernel_test SRCS test.cc DEPS jit_kernel_helper)
# TODO(alleng) fix error when WITH_IPU
if(NOT WIN32 AND NOT WITH_IPU)
if(NOT WIN32)
cc_binary(jit_kernel_benchmark SRCS benchmark.cc DEPS jit_kernel_helper device_tracer tensor)
endif()
if(WITH_TESTING AND TEST jit_kernel_test)
Expand Down

0 comments on commit bd28fb7

Please sign in to comment.