Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cuDNN Wrapper. #3791

Merged
merged 6 commits into from
Sep 4, 2017
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions paddle/platform/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,6 @@ ENDIF()
cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator
system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS})
nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info)

nv_library(cudnn_helper SRCS cudnn_helper.cc DEPS dynload_cuda)
nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
15 changes: 15 additions & 0 deletions paddle/platform/cudnn_helper.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/platform/cudnn_helper.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why need cudnn_helper.cc?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove cudnn_helper.cc

222 changes: 222 additions & 0 deletions paddle/platform/cudnn_helper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#ifndef PADDLE_ONLY_CPU
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do not need the PADDLE_ONLY_CPU macro. In this file, there is no code need to separate by this macro.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove #ifndef PADDLE_ONLY_CPU

#include <cudnn.h>
#include "glog/logging.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#include <glog/logging.h>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove #include <glog/logging.h>, it is used to debug at first.

#include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/macros.h"

namespace paddle {
namespace platform {

enum class DataLayout {
kNHWC,
kNCHW,
kNCHW_VECT_C,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the cudnn's three-dimensional convolution be taken into account?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The layout for 3D Conv can be added when implementing it.

};

enum class PoolingMode {
kMaximum,
kAverage,
};
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The DataLayout and PoolingMode can be removed to other files when writing conv and pooling operators.


template <typename T>
class CudnnDataType;

template <>
class CudnnDataType<float> {
public:
static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

line 45-53 can be written in the template CudnnDataType.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These codes may be not necessary and remove them.

typedef const float ScalingParamType;
static ScalingParamType* kOne() {
static ScalingParamType v = 1.0;
return &v;
}
static const ScalingParamType* kZero() {
static ScalingParamType v = 0.0;
return &v;
}
};

template <>
class CudnnDataType<double> {
public:
static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
typedef const double ScalingParamType;
static ScalingParamType* kOne() {
static ScalingParamType v = 1.0;
return &v;
}
static ScalingParamType* kZero() {
static ScalingParamType v = 0.0;
return &v;
}
};

inline cudnnTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) {
switch (order) {
case DataLayout::kNHWC:
return CUDNN_TENSOR_NHWC;
case DataLayout::kNCHW:
return CUDNN_TENSOR_NCHW;
default:
PADDLE_THROW("Unknown cudnn equivalent for order");
}
return CUDNN_TENSOR_NCHW;
}

class ScopedTensorDescriptor {
public:
ScopedTensorDescriptor() {
PADDLE_ENFORCE(dynload::cudnnCreateTensorDescriptor(&desc_));
}
~ScopedTensorDescriptor() {
PADDLE_ENFORCE(dynload::cudnnDestroyTensorDescriptor(desc_));
}

inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format,
const cudnnDataType_t type,
const std::vector<int>& dims) {
// the format is not used now, but it maybe useful feature
std::vector<int> strides(dims.size());
strides[dims.size() - 1] = 1;
for (int i = dims.size() - 2; i >= 0; i--) {
strides[i] = dims[i + 1] * strides[i + 1];
}
PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor(
desc_, type, dims.size(), dims.data(), strides.data()));
return desc_;
}

template <typename T>
inline cudnnTensorDescriptor_t descriptor(const DataLayout& order,
const std::vector<int>& dims) {
return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type,
dims);
}

private:
cudnnTensorDescriptor_t desc_;
DISABLE_COPY_AND_ASSIGN(ScopedTensorDescriptor);
};

class ScopedFilterDescriptor {
public:
ScopedFilterDescriptor() {
PADDLE_ENFORCE(dynload::cudnnCreateFilterDescriptor(&desc_));
}
~ScopedFilterDescriptor() {
PADDLE_ENFORCE(dynload::cudnnDestroyFilterDescriptor(desc_));
}

inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format,
const cudnnDataType_t type,
const std::vector<int>& kernel) {
// filter layout: output input spatial_dim_y spatial_dim_x
PADDLE_ENFORCE(dynload::cudnnSetFilterNdDescriptor(
desc_, type, format, kernel.size(), kernel.data()));
return desc_;
}

template <typename T>
inline cudnnFilterDescriptor_t descriptor(const DataLayout& order,
const std::vector<int>& kernel) {
return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type,
kernel);
}

private:
cudnnFilterDescriptor_t desc_;
DISABLE_COPY_AND_ASSIGN(ScopedFilterDescriptor);
};

class ScopedConvolutionDescriptor {
public:
ScopedConvolutionDescriptor() {
PADDLE_ENFORCE(dynload::cudnnCreateConvolutionDescriptor(&desc_));
}
~ScopedConvolutionDescriptor() {
PADDLE_ENFORCE(dynload::cudnnDestroyConvolutionDescriptor(desc_));
}

inline cudnnConvolutionDescriptor_t descriptor(
cudnnDataType_t type, const std::vector<int>& pads,
const std::vector<int>& strides, const std::vector<int>& dilations) {
PADDLE_ENFORCE_EQ(pads.size(), strides.size());
PADDLE_ENFORCE_EQ(pads.size(), dilations.size());

#if CUDNN_VERSION < 6000
// cudnn v5 does not support dilation conv, the argument is called upscale
// instead of dilations and it is must be one.
for (size_t i = 0; i < dilations.size(); ++i) {
PADDLE_ENFORCE_EQ(
dilations[i], 1,
"Dilations conv is not supported in this cuDNN version");
}
#endif

PADDLE_ENFORCE(dynload::cudnnSetConvolutionNdDescriptor(
desc_, pads.size(), pads.data(), strides.data(), dilations.data(),
CUDNN_CROSS_CORRELATION, type));
return desc_;
}

template <typename T>
inline cudnnConvolutionDescriptor_t descriptor(
const std::vector<int>& pads, const std::vector<int>& strides,
const std::vector<int>& dilations) {
return descriptor(CudnnDataType<T>::type, pads, strides, dilations);
}

private:
cudnnConvolutionDescriptor_t desc_;
DISABLE_COPY_AND_ASSIGN(ScopedConvolutionDescriptor);
};

class ScopedPoolingDescriptor {
public:
ScopedPoolingDescriptor() {
PADDLE_ENFORCE(dynload::cudnnCreatePoolingDescriptor(&desc_));
}
~ScopedPoolingDescriptor() {
PADDLE_ENFORCE(dynload::cudnnDestroyPoolingDescriptor(desc_));
}

inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode,
const std::vector<int>& kernel,
const std::vector<int>& pads,
const std::vector<int>& strides) {
PADDLE_ENFORCE_EQ(kernel.size(), pads.size());
PADDLE_ENFORCE_EQ(kernel.size(), strides.size());
PADDLE_ENFORCE(dynload::cudnnSetPoolingNdDescriptor(
desc_, (mode == PoolingMode::kMaximum
? CUDNN_POOLING_MAX
: CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING),
CUDNN_PROPAGATE_NAN, // Always propagate nans.
kernel.size(), kernel.data(), pads.data(), strides.data()));
return desc_;
}

private:
cudnnPoolingDescriptor_t desc_;
DISABLE_COPY_AND_ASSIGN(ScopedPoolingDescriptor);
};

} // namespace platform
} // namespace paddle
#endif
121 changes: 121 additions & 0 deletions paddle/platform/cudnn_helper_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/platform/cudnn_helper.h"
#include "glog/logging.h"
#include "gtest/gtest.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#include <glog/logging.h>
#include <gtest/gtest.h>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


TEST(CudnnHelper, ScopedTensorDescriptor) {
using paddle::platform::ScopedTensorDescriptor;
using paddle::platform::DataLayout;

ScopedTensorDescriptor tensor_desc;
std::vector<int> shape = {2, 4, 6, 6};
auto desc = tensor_desc.descriptor<float>(DataLayout::kNCHW, shape);

cudnnDataType_t type;
int nd;
std::vector<int> dims(4);
std::vector<int> strides(4);
paddle::platform::dynload::cudnnGetTensorNdDescriptor(
desc, 4, &type, &nd, dims.data(), strides.data());

EXPECT_EQ(nd, 4);
for (size_t i = 0; i < dims.size(); ++i) {
EXPECT_EQ(dims[i], shape[i]);
}
EXPECT_EQ(strides[3], 1);
EXPECT_EQ(strides[2], 6);
EXPECT_EQ(strides[1], 36);
EXPECT_EQ(strides[0], 144);
}

TEST(CudnnHelper, ScopedFilterDescriptor) {
using paddle::platform::ScopedFilterDescriptor;
using paddle::platform::DataLayout;

ScopedFilterDescriptor filter_desc;
std::vector<int> shape = {2, 3, 3};
auto desc = filter_desc.descriptor<float>(DataLayout::kNCHW, shape);

cudnnDataType_t type;
int nd;
cudnnTensorFormat_t format;
std::vector<int> kernel(3);
paddle::platform::dynload::cudnnGetFilterNdDescriptor(desc, 3, &type, &format,
&nd, kernel.data());

EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format);
EXPECT_EQ(nd, 3);
for (size_t i = 0; i < shape.size(); ++i) {
EXPECT_EQ(kernel[i], shape[i]);
}
}

TEST(CudnnHelper, ScopedConvolutionDescriptor) {
using paddle::platform::ScopedConvolutionDescriptor;

ScopedConvolutionDescriptor conv_desc;
std::vector<int> src_pads = {2, 2, 2};
std::vector<int> src_strides = {1, 1, 1};
std::vector<int> src_dilations = {1, 1, 1};
auto desc = conv_desc.descriptor<float>(src_pads, src_strides, src_dilations);

cudnnDataType_t type;
cudnnConvolutionMode_t mode;
int nd;
std::vector<int> pads(3);
std::vector<int> strides(3);
std::vector<int> dilations(3);
paddle::platform::dynload::cudnnGetConvolutionNdDescriptor(
desc, 3, &nd, pads.data(), strides.data(), dilations.data(), &mode,
&type);

EXPECT_EQ(nd, 3);
for (size_t i = 0; i < src_pads.size(); ++i) {
EXPECT_EQ(pads[i], src_pads[i]);
EXPECT_EQ(strides[i], src_strides[i]);
EXPECT_EQ(dilations[i], src_dilations[i]);
}
EXPECT_EQ(mode, CUDNN_CROSS_CORRELATION);
}

TEST(CudnnHelper, ScopedPoolingDescriptor) {
using paddle::platform::ScopedPoolingDescriptor;
using paddle::platform::PoolingMode;

ScopedPoolingDescriptor pool_desc;
std::vector<int> src_kernel = {2, 2, 5};
std::vector<int> src_pads = {1, 1, 2};
std::vector<int> src_strides = {2, 2, 3};
auto desc = pool_desc.descriptor(PoolingMode::kMaximum, src_kernel, src_pads,
src_strides);

cudnnPoolingMode_t mode;
cudnnNanPropagation_t nan_t = CUDNN_PROPAGATE_NAN;
int nd;
std::vector<int> kernel(3);
std::vector<int> pads(3);
std::vector<int> strides(3);
paddle::platform::dynload::cudnnGetPoolingNdDescriptor(
desc, 3, &mode, &nan_t, &nd, kernel.data(), pads.data(), strides.data());

EXPECT_EQ(nd, 3);
for (size_t i = 0; i < src_pads.size(); ++i) {
EXPECT_EQ(kernel[i], src_kernel[i]);
EXPECT_EQ(pads[i], src_pads[i]);
EXPECT_EQ(strides[i], src_strides[i]);
}
EXPECT_EQ(mode, CUDNN_POOLING_MAX);
}
2 changes: 1 addition & 1 deletion paddle/platform/dynload/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags)
nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc)
nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc DEPS dynamic_loader)
8 changes: 8 additions & 0 deletions paddle/platform/dynload/cudnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,27 @@ extern void* cudnn_dso_handle;
#define CUDNN_DNN_ROUTINE_EACH(__macro) \
__macro(cudnnSetTensor4dDescriptor); \
__macro(cudnnSetTensor4dDescriptorEx); \
__macro(cudnnSetTensorNdDescriptor); \
__macro(cudnnGetTensorNdDescriptor); \
__macro(cudnnGetConvolutionNdForwardOutputDim); \
__macro(cudnnGetConvolutionForwardAlgorithm); \
__macro(cudnnCreateTensorDescriptor); \
__macro(cudnnDestroyTensorDescriptor); \
__macro(cudnnCreateFilterDescriptor); \
__macro(cudnnSetFilter4dDescriptor); \
__macro(cudnnSetFilterNdDescriptor); \
__macro(cudnnGetFilterNdDescriptor); \
__macro(cudnnSetPooling2dDescriptor); \
__macro(cudnnSetPoolingNdDescriptor); \
__macro(cudnnGetPoolingNdDescriptor); \
__macro(cudnnDestroyFilterDescriptor); \
__macro(cudnnCreateConvolutionDescriptor); \
__macro(cudnnCreatePoolingDescriptor); \
__macro(cudnnDestroyPoolingDescriptor); \
__macro(cudnnSetConvolution2dDescriptor); \
__macro(cudnnDestroyConvolutionDescriptor); \
__macro(cudnnSetConvolutionNdDescriptor); \
__macro(cudnnGetConvolutionNdDescriptor); \
__macro(cudnnCreate); \
__macro(cudnnDestroy); \
__macro(cudnnSetStream); \
Expand Down
Loading