Skip to content

Commit

Permalink
fix ut
Browse files Browse the repository at this point in the history
  • Loading branch information
Superjomn committed May 31, 2018
1 parent 96fe6a1 commit 5871e04
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 16 deletions.
45 changes: 38 additions & 7 deletions paddle/fluid/inference/tensorrt/convert/fc_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
Expand All @@ -22,6 +23,29 @@ namespace paddle {
namespace inference {
namespace tensorrt {

template <typename T>
void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides,
T* odata, nvinfer1::DimsHW ostrides) {
for (int h = 0; h < shape.h(); ++h) {
for (int w = 0; w < shape.w(); ++w) {
odata[h * ostrides.h() + w * ostrides.w()] =
idata[h * ostrides.h() + w * ostrides.w()];
}
}
}

void ReorderCKtoKC(TensorRTEngine::Weight& iweights,
TensorRTEngine::Weight* oweights) {
int c = iweights.dims[0];
int k = iweights.dims[1];
oweights->dims.assign({k, c});
nvinfer1::DimsHW istrides = {1, k};
nvinfer1::DimsHW ostrides = {c, 1};
Reorder2({k, c}, static_cast<float const*>(iweights.get().values), istrides,
static_cast<float*>(const_cast<void*>(oweights->get().values)),
ostrides);
}

/*
* FC converter convert a MUL op in Fluid to a FC layer in TRT.
*/
Expand Down Expand Up @@ -49,28 +73,35 @@ class FcOpConverter : public OpConverter {
PADDLE_ENFORCE_EQ(Y_t->dims().size(), 2UL); // a matrix
size_t n_output = Y_t->dims()[1];

framework::LoDTensor tmp;
tmp.Resize(Y_t->dims());
memcpy(tmp.mutable_data<float>(platform::CPUPlace()), Y_t->data<float>(),
Y_t->dims()[0] * Y_t->dims()[1]);

TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
static_cast<void*>(weight_data),
Y_t->memory_size() / sizeof(float)};
TensorRTEngine::Weight tmp_weight(nvinfer1::DataType::kFLOAT,
static_cast<void*>(tmp.data<float>()),
Y_t->memory_size() / sizeof(float));
weight.dims.assign({Y_t->dims()[0], Y_t->dims()[1]});
tmp_weight.dims = weight.dims;

TensorRTEngine::Weight transposed = weight;
ReorderCKtoKC(tmp_weight, &weight);

// Currently, the framework can only handle one fluid op -> one TRT layer,
// but fc fuses `mul` and `bias` (2 fluid ops), so here is a trick, just
// handle `mul`, leave `add` as another layer.
// DEBUG
TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr,
0};
TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0};

auto* layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected,
*const_cast<nvinfer1::ITensor*>(X),
n_output, weight.get(), bias.get());

auto output_name = op_desc.Output("Out").front();
engine_->DeclareOutput(layer, 0, output_name);
auto* output = engine_->GetITensor(output_name);
LOG(INFO) << "output dim";
for (int i = 0; i < output->getDimensions().nbDims; i++) {
LOG(INFO) << output->getDimensions().d[i];
}
}
};

Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ TEST(fc_op, test) {
framework::Scope scope;
TRTConvertValidation validator(20, parameters, scope, 1000);

validator.DeclInputVar("mul-X", nvinfer1::Dims4(12, 6, 1, 1));
validator.DeclParamVar("mul-Y", nvinfer1::Dims2(6, 3));
validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(12, 3));
validator.DeclInputVar("mul-X", nvinfer1::Dims4(8, 3, 1, 1));
validator.DeclParamVar("mul-Y", nvinfer1::Dims2(3, 2));
validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(8, 2));

// Prepare Op description
framework::OpDesc desc;
Expand Down
6 changes: 0 additions & 6 deletions paddle/fluid/inference/tensorrt/convert/ut_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,6 @@ class TRTConvertValidation {
auto* var = scope_.FindVar(input);
PADDLE_ENFORCE(var);
auto tensor = var->GetMutable<framework::LoDTensor>();
LOG(INFO) << "set input for TRT " << input;
LOG(INFO) << tensor->data<float>()[0];
LOG(INFO) << tensor->data<float>()[1];
LOG(INFO) << "set data size " << analysis::AccuDims(tensor->dims(), tensor->dims().size());

engine_->SetInputFromCPU(
input, static_cast<void*>(tensor->data<void>()),
Expand All @@ -140,7 +136,6 @@ class TRTConvertValidation {
for (const auto& output : op_desc_->OutputArgumentNames()) {
std::vector<float> fluid_out;
std::vector<float> trt_out(200, 2008.);
LOG(INFO) << "get TRT output " << output;
engine_->GetOutputInCPU(output, &trt_out[0], 200 * sizeof(float));
cudaStreamSynchronize(*engine_->stream());

Expand All @@ -150,7 +145,6 @@ class TRTConvertValidation {
// Compare two output
ASSERT_FALSE(fluid_out.empty());
for (size_t i = 0; i < fluid_out.size(); i++) {
LOG(INFO) << fluid_out[i] << " " << trt_out[i];
EXPECT_LT(std::abs(fluid_out[i] - trt_out[i]), 1e-6);
}
}
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/inference/tensorrt/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class TensorRTEngine : public EngineBase {
}
const nvinfer1::Weights& get() { return w_; }

std::vector<int64_t> dims;

private:
nvinfer1::Weights w_;
};
Expand Down

0 comments on commit 5871e04

Please sign in to comment.