Skip to content

Commit

Permalink
3D max subsampling layer implementation added to CUDA backend
Browse files Browse the repository at this point in the history
  • Loading branch information
milakov committed Sep 21, 2013
1 parent 55c2eda commit a89e80f
Show file tree
Hide file tree
Showing 14 changed files with 1,280 additions and 49 deletions.
14 changes: 2 additions & 12 deletions nnforge/cuda/max_subsampling_2d_layer_hessian_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,6 @@ struct __align__(4) y_feature_map_config
unsigned int y_feature_map_id_pair;
};

struct __align__(4) window_x_window_y_config
{
window_x_window_y_config(int window_x, int window_y)
{
this->window_x_window_y_pair = (((unsigned int)window_x) << 16) | (unsigned int)window_y;
}

unsigned int window_x_window_y_pair;
};

struct __align__(4) x_y_config
{
x_y_config(int x, int y)
Expand Down Expand Up @@ -238,7 +228,7 @@ namespace nnforge
subsampling_sizes[0]);

int threadblock_size = kernel_dims.second.x * kernel_dims.second.y * kernel_dims.second.z;
int smem_size = threadblock_size * (sizeof(float) + sizeof(window_x_window_y_config));
int smem_size = threadblock_size * (sizeof(float) + sizeof(int));

max_subsampling_2d_tex_hess_kernel<<<kernel_dims.first, kernel_dims.second, smem_size, stream_id>>>(
output,
Expand Down Expand Up @@ -320,7 +310,7 @@ namespace nnforge
{
std::vector<size_t> res;

res.push_back(output_elem_count_per_entry * sizeof(float));
res.push_back(output_elem_count_per_entry * sizeof(x_y_config));

return res;
}
Expand Down
34 changes: 6 additions & 28 deletions nnforge/cuda/max_subsampling_2d_layer_updater_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
#include "neural_network_cuda_exception.h"
#include "../max_subsampling_layer.h"

texture<float, cudaTextureType1D, cudaReadModeElementType> input_tex_ref;

struct __align__(4) window_x_x_config
{
window_x_x_config(int window_x, int x)
Expand All @@ -44,16 +42,6 @@ struct __align__(4) y_feature_map_config
unsigned int y_feature_map_id_pair;
};

struct __align__(4) window_x_window_y_config
{
window_x_window_y_config(int window_x, int window_y)
{
this->window_x_window_y_pair = (((unsigned int)window_x) << 16) | (unsigned int)window_y;
}

unsigned int window_x_window_y_pair;
};

struct __align__(4) x_y_config
{
x_y_config(int x, int y)
Expand All @@ -69,6 +57,7 @@ extern __shared__ float arr_sh[];
__global__ void max_subsampling_2d_tex_upd_kernel(
float * __restrict output,
x_y_config * __restrict max_positions,
const float * __restrict input,
const window_x_x_config * __restrict window_x_x_config_list,
const y_feature_map_config * __restrict y_feature_map_config_list,
int subsampling_width,
Expand Down Expand Up @@ -115,12 +104,12 @@ __global__ void max_subsampling_2d_tex_upd_kernel(

int current_input_elem_id = ((entry_id * feature_map_count + feature_map_id) * input_height + input_y) * input_width + input_x;

res = tex1Dfetch(input_tex_ref, current_input_elem_id);
res = input[current_input_elem_id];
max_pos_y = 0;
for(int j = 1; j < subsampling_height; ++j)
{
current_input_elem_id += input_width;
float new_val = tex1Dfetch(input_tex_ref, current_input_elem_id);
float new_val = input[current_input_elem_id];
if (new_val > res)
{
res = new_val;
Expand Down Expand Up @@ -206,8 +195,6 @@ namespace nnforge
{
max_subsampling_2d_layer_updater_cuda::max_subsampling_2d_layer_updater_cuda()
{
input_tex_ref.addressMode[0] = cudaAddressModeBorder;
input_tex_ref.normalized = false;
}

max_subsampling_2d_layer_updater_cuda::~max_subsampling_2d_layer_updater_cuda()
Expand All @@ -225,8 +212,7 @@ namespace nnforge
std::vector<cuda_memobject_smart_ptr>& dynamic_memobjects,
unsigned int entry_count)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<float>();
cuda_safe_call(cudaBindTexture(0, input_tex_ref, *input_neurons_buffer, desc, input_elem_count_per_entry * entry_count * sizeof(float)));
const float * input = *input_neurons_buffer;

float * output = *output_neurons_buffer;
x_y_config * max_positions = (x_y_config *)((void *)(*additional_buffers[0]));
Expand All @@ -245,11 +231,12 @@ namespace nnforge
subsampling_sizes[0]);

int threadblock_size = kernel_dims.second.x * kernel_dims.second.y * kernel_dims.second.z;
int smem_size = threadblock_size * (sizeof(float) + sizeof(window_x_window_y_config));
int smem_size = threadblock_size * (sizeof(float) + sizeof(int));

max_subsampling_2d_tex_upd_kernel<<<kernel_dims.first, kernel_dims.second, smem_size, stream_id>>>(
output,
max_positions,
input,
window_x_x_config_list,
y_feature_map_config_list,
subsampling_sizes[0],
Expand Down Expand Up @@ -312,15 +299,6 @@ namespace nnforge
x_y_config_count);
}

std::vector<unsigned int> max_subsampling_2d_layer_updater_cuda::get_linear_addressing_through_texture_per_entry() const
{
std::vector<unsigned int> res;

res.push_back(input_elem_count_per_entry);

return res;
}

void max_subsampling_2d_layer_updater_cuda::updater_configured()
{
if (!different_input)
Expand Down
2 changes: 0 additions & 2 deletions nnforge/cuda/max_subsampling_2d_layer_updater_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ namespace nnforge

virtual bool is_in_place_backprop() const;

virtual std::vector<unsigned int> get_linear_addressing_through_texture_per_entry() const;

virtual std::vector<size_t> get_sizes_of_additional_buffers_per_entry() const;

virtual std::vector<size_t> get_sizes_of_additional_buffers_fixed() const;
Expand Down
Loading

0 comments on commit a89e80f

Please sign in to comment.