Merge pull request PaddlePaddle#179 from graphcore/revert-171-resize_…

…tensor_inside_run Revert "Resize tensor inside ipu_backend::Run()"
graphcore · Sep 22, 2021 · bd28fb7 · bd28fb7
2 parents 4bab0d4 + 9495c38
commit bd28fb7
Show file tree

Hide file tree

Showing 10 changed files with 63 additions and 66 deletions.
diff --git a/paddle/fluid/framework/ipu/ipu_backend.cc b/paddle/fluid/framework/ipu/ipu_backend.cc
@@ -59,15 +59,17 @@ void IpuBackend::Compile(ir::Graph* graph,
   compiler_->LowerWeights(graph, scope_);
   compiler_->LowerBody(graph);
   compiler_->InitOutputs(fetch_list);
+  executor_->SetOutputTensorId(compiler_->GetOutputTensors());
   executor_->SetWeights(compiler_->GetWeights());
   VLOG(10) << "leave IpuBackend::Compile";
 }
 
-void IpuBackend::Run(const framework::ExecutionContext& ctx) {
+void IpuBackend::Run(const std::vector<const Tensor*>& inputs,
+                     const std::vector<Tensor*>& outputs) {
   Prepare();
   auto inputs_id = compiler_->GetInputs();
   auto outputs_id = compiler_->GetOutputs();
-  executor_->Run(inputs_id, outputs_id, ctx);
+  executor_->Run(inputs_id, inputs, outputs_id, outputs);
 }
 
 void IpuBackend::Prepare() {

diff --git a/paddle/fluid/framework/ipu/ipu_backend.h b/paddle/fluid/framework/ipu/ipu_backend.h
@@ -22,7 +22,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/ipu/ipu_compiler.h"
 #include "paddle/fluid/framework/ipu/ipu_executor.h"
 #include "paddle/fluid/framework/ipu/ipu_strategy.h"
-#include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/platform/enforce.h"
@@ -54,11 +53,15 @@ class IpuBackend {
   void Compile(ir::Graph *graph, const std::vector<std::string> &feed_list,
                const std::vector<std::string> &fetch_list);
 
+  // need doc
+  void Prepare();
+
   // what run does include:
   //   1. construct forward onnx graph
   //   2. graph-level optimization
   //   3. autodiff
-  void Run(const framework::ExecutionContext &ctx);
+  void Run(const std::vector<const Tensor *> &inputs,
+           const std::vector<Tensor *> &outputs);
 
   Executor &GetExecutor() { return *executor_; }
 
@@ -75,7 +78,6 @@ class IpuBackend {
 
  private:
   int UpperIpuNum();
-  void Prepare();
 
  private:
   std::shared_ptr<Compiler> compiler_;

diff --git a/paddle/fluid/framework/ipu/ipu_compiler.cc b/paddle/fluid/framework/ipu/ipu_compiler.cc
@@ -367,6 +367,15 @@ std::vector<int64_t> Compiler::GetTensorShape(const std::string& name) {
   return builder_->getTensorShape(tensors_[name]);
 }
 
+std::map<std::string, std::string> Compiler::GetOutputTensors() {
+  std::map<std::string, std::string> outputs;
+  for (const auto& fetch_name : fetch_list_) {
+    auto tensorid = tensors_[fetch_name];
+    outputs[fetch_name] = tensorid;
+  }
+  return outputs;
+}
+
 std::vector<popart::TensorId>& Compiler::GetWeights() { return weights_; }
 
 std::string Compiler::GetModelProto() { return builder_->getModelProto(); }

diff --git a/paddle/fluid/framework/ipu/ipu_compiler.h b/paddle/fluid/framework/ipu/ipu_compiler.h
@@ -47,6 +47,7 @@ class Compiler {
   std::vector<popart::TensorId> GetOutputs() { return outputs_; }
   std::map<std::string, popart::TensorId> GetTensors() { return tensors_; }
   std::vector<int64_t> GetTensorShape(const std::string &name);
+  std::map<std::string, std::string> GetOutputTensors();
   std::vector<popart::TensorId> &GetWeights();
 
   std::string GetModelProto();

diff --git a/paddle/fluid/framework/ipu/ipu_executor.cc b/paddle/fluid/framework/ipu/ipu_executor.cc
@@ -81,11 +81,9 @@ void Executor::Prepare(const std::string &proto,
 }
 
 void Executor::Run(const std::vector<popart::TensorId> &inputs_id,
+                   const std::vector<const Tensor *> &inputs,
                    const std::vector<popart::TensorId> &outputs_id,
-                   const framework::ExecutionContext &ctx) {
-  auto inputs = ctx.MultiInput<framework::Tensor>("FeedList");
-  auto outputs = ctx.MultiOutput<framework::Tensor>("FetchList");
-  // inputs
+                   const std::vector<Tensor *> &outputs) {
   std::map<popart::TensorId, popart::IArray &> popart_inputs;
   std::map<popart::TensorId, PaddleIArray> input_wrappers;
   for (size_t i = 0; i < inputs.size(); i++) {
@@ -94,23 +92,12 @@ void Executor::Run(const std::vector<popart::TensorId> &inputs_id,
     input_wrappers.emplace(tensor_id, PaddleIArray(tensor));
     popart_inputs.emplace(tensor_id, input_wrappers.at(tensor_id));
   }
-  // anchors
+
   std::map<popart::TensorId, popart::IArray &> popart_anchors;
   std::map<popart::TensorId, PaddleIArray> anchor_wrappers;
   for (size_t i = 0; i < outputs.size(); i++) {
     auto tensor_id = outputs_id[i];
     auto tensor = const_cast<Tensor *>(outputs[i]);
-    // get dims & dtype from session
-    auto fetch_info = session_->getInfo(tensor_id);
-    auto output_shape = fetch_info.shape();
-    if (ipu_strategy_->batches_per_step > 1) {
-      output_shape.insert(output_shape.begin(),
-                          ipu_strategy_->batches_per_step);
-    }
-    tensor->Resize(framework::make_ddim(output_shape));
-    auto fetch_dtype = fetch_info.dataType();
-    auto paddle_type = PopartType2VarType(fetch_dtype);
-    tensor->mutable_data(ctx.GetPlace(), paddle_type);
     anchor_wrappers.emplace(tensor_id, PaddleIArray(tensor));
     popart_anchors.emplace(tensor_id, anchor_wrappers.at(tensor_id));
   }
@@ -204,6 +191,21 @@ void Executor::SetIpuStrategy(const IpuStrategy &strategy) {
   ipu_strategy_ = &strategy;
 }
 
+void Executor::SetOutputTensorId(
+    const std::map<std::string, std::string> &outputs) {
+  outputs_ = outputs;
+}
+
+std::vector<int64_t> Executor::GetOutputShape(const std::string &fetch_name) {
+  auto tensor_id = outputs_[fetch_name];
+  auto fetch_info = session_->getInfo(tensor_id);
+  auto output_shape = fetch_info.shape();
+  if (ipu_strategy_->batches_per_step > 1) {
+    output_shape.insert(output_shape.begin(), ipu_strategy_->batches_per_step);
+  }
+  return output_shape;
+}
+
 float Executor::GetLRFromScope() {
   auto lr_var = scope_->GetVar(opt_info.GetLRVarName());
   auto tensor = lr_var->Get<framework::LoDTensor>();

diff --git a/paddle/fluid/framework/ipu/ipu_executor.h b/paddle/fluid/framework/ipu/ipu_executor.h
@@ -22,7 +22,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/ipu/ipu_optimizer.h"
 #include "paddle/fluid/framework/ipu/ipu_strategy.h"
 #include "paddle/fluid/framework/ipu/ipu_utils.h"
-#include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/scope.h"
 
 namespace paddle {
@@ -39,8 +38,9 @@ class Executor {
                const std::vector<popart::TensorId> &outputs,
                std::shared_ptr<popart::DeviceInfo> device);
   void Run(const std::vector<popart::TensorId> &inputs_id,
+           const std::vector<const Tensor *> &inputs,
            const std::vector<popart::TensorId> &outputs_id,
-           const framework::ExecutionContext &ctx);
+           const std::vector<Tensor *> &outputs);
 
   // Optimizer
   void SetOptimizerType(const std::string &type);
@@ -61,6 +61,10 @@ class Executor {
   // Strategy
   void SetIpuStrategy(const IpuStrategy &strategy);
 
+  // Outputs
+  void SetOutputTensorId(const std::map<std::string, std::string> &outputs);
+  std::vector<int64_t> GetOutputShape(const std::string &fetch_name);
+
  private:
   float GetLRFromScope();
 
@@ -73,6 +77,7 @@ class Executor {
   const IpuStrategy *ipu_strategy_ = nullptr;
   popart::WeightsIO weights_io_;
   std::vector<popart::TensorId> weights_;
+  std::map<std::string, std::string> outputs_;
 };
 
 }  // namespace ipu

diff --git a/paddle/fluid/framework/ipu/ipu_utils.cc b/paddle/fluid/framework/ipu/ipu_utils.cc
@@ -37,7 +37,7 @@ std::size_t PaddleIArray::nelms() const {
 
 const popart::Shape PaddleIArray::shape() const { return shape_; }
 
-popart::DataType VarType2PopartType(const proto::VarType::Type type) {
+popart::DataType VarType2PopartType(proto::VarType::Type type) {
   switch (type) {
     case proto::VarType::UINT8:
       return popart::DataType::UINT8;
@@ -69,39 +69,7 @@ popart::DataType VarType2PopartType(const proto::VarType::Type type) {
   }
 }
 
-proto::VarType::Type PopartType2VarType(const popart::DataType type) {
-  switch (type) {
-    case popart::DataType::UINT8:
-      return proto::VarType::UINT8;
-    case popart::DataType::INT8:
-      return proto::VarType::INT8;
-    case popart::DataType::INT16:
-      return proto::VarType::INT16;
-    case popart::DataType::INT32:
-      return proto::VarType::INT32;
-    case popart::DataType::INT64:
-      return proto::VarType::INT64;
-    case popart::DataType::BOOL:
-      return proto::VarType::BOOL;
-    case popart::DataType::DOUBLE:
-      return proto::VarType::FP64;
-    case popart::DataType::FLOAT:
-      return proto::VarType::FP32;
-    case popart::DataType::FLOAT16:
-      return proto::VarType::FP16;
-    case popart::DataType::BFLOAT16:
-      return proto::VarType::BF16;
-    case popart::DataType::COMPLEX64:
-      return proto::VarType::COMPLEX64;
-    case popart::DataType::COMPLEX128:
-      return proto::VarType::COMPLEX128;
-    default:
-      PADDLE_THROW(paddle::platform::errors::Unavailable(
-          "Unsupported Paddle var type."));
-  }
-}
-
-popart::DataType OnnxDtype2PopartType(const int type) {
+popart::DataType OnnxDtype2PopartType(int type) {
   auto dtype = static_cast<ONNXDataType>(type);
   switch (dtype) {
     case ONNXDataType::BOOL:

diff --git a/paddle/fluid/framework/ipu/ipu_utils.h b/paddle/fluid/framework/ipu/ipu_utils.h
@@ -69,9 +69,8 @@ class PaddleIArray final : public popart::IArray {
   std::vector<int64_t> shape_;
 };
 
-popart::DataType VarType2PopartType(const proto::VarType::Type type);
-proto::VarType::Type PopartType2VarType(const popart::DataType type);
-popart::DataType OnnxDtype2PopartType(const int type);
+popart::DataType VarType2PopartType(proto::VarType::Type type);
+popart::DataType OnnxDtype2PopartType(int type);
 bool GetBoolEnv(std::string str);
 
 template <typename T>

diff --git a/paddle/fluid/operators/ipu_runtime_op.h b/paddle/fluid/operators/ipu_runtime_op.h
@@ -37,13 +37,23 @@ class IpuRuntimeKernel : public framework::OpKernel<T> {
               ctx.device_context());
       ipu_backend->AttachDevice(ipu_ctx.DeviceId());
     }
-
+    VLOG(4) << "IpuBackend prepare session";
+    ipu_backend->Prepare();
     VLOG(4) << "IpuRuntime Kernel, begin to run graph";
-    ipu_backend->Run(ctx);
-
-    // post-run
+    auto inputs = ctx.MultiInput<framework::Tensor>("FeedList");
     auto outputs = ctx.MultiOutput<framework::Tensor>("FetchList");
     auto output_names = ctx.OutputNames("FetchList");
+    for (size_t i = 0; i < outputs.size(); ++i) {
+      auto* out = outputs[i];
+      auto oshape = ipu_backend->GetExecutor().GetOutputShape(output_names[i]);
+      out->Resize(framework::make_ddim(oshape));
+      // TODO(alleng) support muti-output dtypes
+      // maybe get dtype from ipu_backend
+      out->mutable_data<T>(ctx.GetPlace());
+    }
+
+    ipu_backend->Run(inputs, outputs);
+
     // resize tensor when tensor.dims() is empty
     for (size_t i = 0; i < outputs.size(); ++i) {
       auto* out = outputs[i];

diff --git a/paddle/fluid/operators/jit/CMakeLists.txt b/paddle/fluid/operators/jit/CMakeLists.txt
@@ -23,8 +23,7 @@ endif()
 
 cc_library(jit_kernel_helper INTERFACE SRCS ${jit_kernel_cc_srcs} DEPS jit_kernel_base ${JIT_KERNEL_DEPS})
 cc_test(jit_kernel_test SRCS test.cc DEPS jit_kernel_helper)
-# TODO(alleng) fix error when WITH_IPU
-if(NOT WIN32 AND NOT WITH_IPU)
+if(NOT WIN32)
     cc_binary(jit_kernel_benchmark SRCS benchmark.cc DEPS jit_kernel_helper device_tracer tensor)
 endif()
 if(WITH_TESTING AND TEST jit_kernel_test)