fix ut

PaddlePaddle · May 31, 2018 · 5871e04 · 5871e04
1 parent 96fe6a1
commit 5871e04
Show file tree

Hide file tree

Showing 4 changed files with 43 additions and 16 deletions.
diff --git a/paddle/fluid/inference/tensorrt/convert/fc_op.cc b/paddle/fluid/inference/tensorrt/convert/fc_op.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
@@ -22,6 +23,29 @@ namespace paddle {
 namespace inference {
 namespace tensorrt {
 
+template <typename T>
+void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides,
+              T* odata, nvinfer1::DimsHW ostrides) {
+  for (int h = 0; h < shape.h(); ++h) {
+    for (int w = 0; w < shape.w(); ++w) {
+      odata[h * ostrides.h() + w * ostrides.w()] =
+          idata[h * ostrides.h() + w * ostrides.w()];
+    }
+  }
+}
+
+void ReorderCKtoKC(TensorRTEngine::Weight& iweights,
+                   TensorRTEngine::Weight* oweights) {
+  int c = iweights.dims[0];
+  int k = iweights.dims[1];
+  oweights->dims.assign({k, c});
+  nvinfer1::DimsHW istrides = {1, k};
+  nvinfer1::DimsHW ostrides = {c, 1};
+  Reorder2({k, c}, static_cast<float const*>(iweights.get().values), istrides,
+           static_cast<float*>(const_cast<void*>(oweights->get().values)),
+           ostrides);
+}
+
 /*
  * FC converter convert a MUL op in Fluid to a FC layer in TRT.
  */
@@ -49,28 +73,35 @@ class FcOpConverter : public OpConverter {
     PADDLE_ENFORCE_EQ(Y_t->dims().size(), 2UL);  // a matrix
     size_t n_output = Y_t->dims()[1];
 
+    framework::LoDTensor tmp;
+    tmp.Resize(Y_t->dims());
+    memcpy(tmp.mutable_data<float>(platform::CPUPlace()), Y_t->data<float>(),
+           Y_t->dims()[0] * Y_t->dims()[1]);
+
     TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
                                   static_cast<void*>(weight_data),
                                   Y_t->memory_size() / sizeof(float)};
+    TensorRTEngine::Weight tmp_weight(nvinfer1::DataType::kFLOAT,
+                                      static_cast<void*>(tmp.data<float>()),
+                                      Y_t->memory_size() / sizeof(float));
+    weight.dims.assign({Y_t->dims()[0], Y_t->dims()[1]});
+    tmp_weight.dims = weight.dims;
+
+    TensorRTEngine::Weight transposed = weight;
+    ReorderCKtoKC(tmp_weight, &weight);
 
     // Currently, the framework can only handle one fluid op -> one TRT layer,
     // but fc fuses `mul` and `bias` (2 fluid ops), so here is a trick, just
     // handle `mul`, leave `add` as another layer.
     // DEBUG
-    TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr,
-                                0};
+    TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
 
     auto* layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected,
                                        *const_cast<nvinfer1::ITensor*>(X),
                                        n_output, weight.get(), bias.get());
 
     auto output_name = op_desc.Output("Out").front();
     engine_->DeclareOutput(layer, 0, output_name);
-    auto* output = engine_->GetITensor(output_name);
-    LOG(INFO) << "output dim";
-    for (int i = 0; i < output->getDimensions().nbDims; i++) {
-      LOG(INFO) << output->getDimensions().d[i];
-    }
   }
 };
 

diff --git a/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc b/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
@@ -25,9 +25,9 @@ TEST(fc_op, test) {
   framework::Scope scope;
   TRTConvertValidation validator(20, parameters, scope, 1000);
 
-  validator.DeclInputVar("mul-X", nvinfer1::Dims4(12, 6, 1, 1));
-  validator.DeclParamVar("mul-Y", nvinfer1::Dims2(6, 3));
-  validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(12, 3));
+  validator.DeclInputVar("mul-X", nvinfer1::Dims4(8, 3, 1, 1));
+  validator.DeclParamVar("mul-Y", nvinfer1::Dims2(3, 2));
+  validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(8, 2));
 
   // Prepare Op description
   framework::OpDesc desc;

diff --git a/paddle/fluid/inference/tensorrt/convert/ut_helper.h b/paddle/fluid/inference/tensorrt/convert/ut_helper.h
@@ -115,10 +115,6 @@ class TRTConvertValidation {
       auto* var = scope_.FindVar(input);
       PADDLE_ENFORCE(var);
       auto tensor = var->GetMutable<framework::LoDTensor>();
-      LOG(INFO) << "set input for TRT " << input;
-      LOG(INFO) << tensor->data<float>()[0];
-      LOG(INFO) << tensor->data<float>()[1];
-      LOG(INFO) << "set data size " << analysis::AccuDims(tensor->dims(), tensor->dims().size());
 
       engine_->SetInputFromCPU(
           input, static_cast<void*>(tensor->data<void>()),
@@ -140,7 +136,6 @@ class TRTConvertValidation {
     for (const auto& output : op_desc_->OutputArgumentNames()) {
       std::vector<float> fluid_out;
       std::vector<float> trt_out(200, 2008.);
-      LOG(INFO) << "get TRT output " << output;
       engine_->GetOutputInCPU(output, &trt_out[0], 200 * sizeof(float));
       cudaStreamSynchronize(*engine_->stream());
 
@@ -150,7 +145,6 @@ class TRTConvertValidation {
       // Compare two output
       ASSERT_FALSE(fluid_out.empty());
       for (size_t i = 0; i < fluid_out.size(); i++) {
-        LOG(INFO) << fluid_out[i] << " " << trt_out[i];
         EXPECT_LT(std::abs(fluid_out[i] - trt_out[i]), 1e-6);
       }
     }

diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
@@ -44,6 +44,8 @@ class TensorRTEngine : public EngineBase {
     }
     const nvinfer1::Weights& get() { return w_; }
 
+    std::vector<int64_t> dims;
+
    private:
     nvinfer1::Weights w_;
   };