diff --git a/nnforge/cuda/cuda.cpp b/nnforge/cuda/cuda.cpp index a1087c8..641775f 100644 --- a/nnforge/cuda/cuda.cpp +++ b/nnforge/cuda/cuda.cpp @@ -17,6 +17,7 @@ #include "cuda.h" #include "../nnforge.h" + #include "layer_testing_schema_factory.h" #include "local_contrast_subtractive_layer_testing_schema.h" #include "absolute_layer_testing_schema.h" @@ -31,6 +32,7 @@ #include "maxout_layer_testing_schema.h" #include "layer_hessian_schema_factory.h" +#include "local_contrast_subtractive_layer_hessian_schema.h" #include "absolute_layer_hessian_schema.h" #include "hyperbolic_tangent_layer_hessian_schema.h" #include "average_subsampling_layer_hessian_schema.h" @@ -42,6 +44,7 @@ #include "maxout_layer_hessian_schema.h" #include "layer_updater_schema_factory.h" +#include "local_contrast_subtractive_layer_updater_schema.h" #include "absolute_layer_updater_schema.h" #include "hyperbolic_tangent_layer_updater_schema.h" #include "average_subsampling_layer_updater_schema.h" @@ -75,6 +78,7 @@ namespace nnforge single_layer_testing_schema_factory::get_mutable_instance().register_layer_testing_schema(layer_testing_schema_smart_ptr(new rgb_to_yuv_convert_layer_testing_schema())); single_layer_testing_schema_factory::get_mutable_instance().register_layer_testing_schema(layer_testing_schema_smart_ptr(new maxout_layer_testing_schema())); + single_layer_hessian_schema_factory::get_mutable_instance().register_layer_hessian_schema(layer_hessian_schema_smart_ptr(new local_contrast_subtractive_layer_hessian_schema())); single_layer_hessian_schema_factory::get_mutable_instance().register_layer_hessian_schema(layer_hessian_schema_smart_ptr(new absolute_layer_hessian_schema())); single_layer_hessian_schema_factory::get_mutable_instance().register_layer_hessian_schema(layer_hessian_schema_smart_ptr(new hyperbolic_tangent_layer_hessian_schema())); single_layer_hessian_schema_factory::get_mutable_instance().register_layer_hessian_schema(layer_hessian_schema_smart_ptr(new average_subsampling_layer_hessian_schema())); @@ -85,6 +89,7 @@ namespace nnforge single_layer_hessian_schema_factory::get_mutable_instance().register_layer_hessian_schema(layer_hessian_schema_smart_ptr(new softmax_layer_hessian_schema())); single_layer_hessian_schema_factory::get_mutable_instance().register_layer_hessian_schema(layer_hessian_schema_smart_ptr(new maxout_layer_hessian_schema())); + single_layer_updater_schema_factory::get_mutable_instance().register_layer_updater_schema(layer_updater_schema_smart_ptr(new local_contrast_subtractive_layer_updater_schema())); single_layer_updater_schema_factory::get_mutable_instance().register_layer_updater_schema(layer_updater_schema_smart_ptr(new absolute_layer_updater_schema())); single_layer_updater_schema_factory::get_mutable_instance().register_layer_updater_schema(layer_updater_schema_smart_ptr(new hyperbolic_tangent_layer_updater_schema())); single_layer_updater_schema_factory::get_mutable_instance().register_layer_updater_schema(layer_updater_schema_smart_ptr(new average_subsampling_layer_updater_schema())); diff --git a/nnforge/cuda/local_contrast_subtractive_2d_layer_hessian_cuda.cu b/nnforge/cuda/local_contrast_subtractive_2d_layer_hessian_cuda.cu new file mode 100644 index 0000000..1d64cb1 --- /dev/null +++ b/nnforge/cuda/local_contrast_subtractive_2d_layer_hessian_cuda.cu @@ -0,0 +1,615 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "local_contrast_subtractive_2d_layer_hessian_cuda.h" + +#include "../local_contrast_subtractive_layer.h" + +#include "util_cuda.h" + +__global__ void local_contrast_subtractive_2d_blur_horizontal_hess_kernel( + const float * __restrict input, + float * __restrict output, + const unsigned int * __restrict affected_feature_map_list, + const float * __restrict weights, + int input_feature_map_count, + int affected_feature_map_count, + int window_width, + int width, + int height, + int entry_count) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int z = blockIdx.z * blockDim.z + threadIdx.z; + int entry_id = z / affected_feature_map_count; + + bool in_bounds = (x < width) && (y < height) && (entry_id < entry_count); + if (in_bounds) + { + int affected_feature_map_index = z - (entry_id * affected_feature_map_count); + int affected_feature_map_id = affected_feature_map_list[affected_feature_map_index]; + const float * current_input = input + (int)(((entry_id * input_feature_map_count + affected_feature_map_id) * height + y) * width + x); + const float * current_input_low = current_input; + const float * current_input_high = current_input; + const float * current_weights = weights; + float res = *current_input * *current_weights; + #pragma unroll 4 + for(int i = 1; i < window_width; ++i) + { + current_weights++; + if (i < x + 1) + current_input_low--; + if (i > x + 1) + current_input_low++; + if (i < width - x) + current_input_high++; + if (i > width - x) + current_input_high--; + res += (*current_input_low + *current_input_high) * *current_weights; + } + + output[(z * height + y) * width + x] = res; + } +} + +template +__global__ void local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel( + const float * __restrict input, + float * __restrict output, + const unsigned int * __restrict affected_feature_map_list, + const float * __restrict weights, + int input_feature_map_count, + int affected_feature_map_count, + int width, + int height, + int entry_count) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int z = blockIdx.z * blockDim.z + threadIdx.z; + int entry_id = z / affected_feature_map_count; + + bool in_bounds = (x < width) && (y < height) && (entry_id < entry_count); + if (in_bounds) + { + int affected_feature_map_index = z - (entry_id * affected_feature_map_count); + int affected_feature_map_id = affected_feature_map_list[affected_feature_map_index]; + const float * current_input = input + (int)(((entry_id * input_feature_map_count + affected_feature_map_id) * height + y) * width + x); + const float * current_input_low = current_input; + const float * current_input_high = current_input; + const float * current_weights = weights; + float res = *current_input * *current_weights; + #pragma unroll + for(int i = 1; i < WINDOW_WIDTH; ++i) + { + current_weights++; + if (i < x + 1) + current_input_low--; + if (i > x + 1) + current_input_low++; + if (i < width - x) + current_input_high++; + if (i > width - x) + current_input_high--; + res += (*current_input_low + *current_input_high) * *current_weights; + } + + output[(z * height + y) * width + x] = res; + } +} + +__global__ void local_contrast_subtractive_2d_blur_vertical_and_subtract_hess_kernel( + const float * __restrict input, + const float * __restrict original_input, + float * __restrict output, + const unsigned int * __restrict affected_feature_map_list, + const float * __restrict weights, + int input_feature_map_count, + int affected_feature_map_count, + int window_height, + int width, + int height, + int entry_count) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int z = blockIdx.z * blockDim.z + threadIdx.z; + int entry_id = z / affected_feature_map_count; + + bool in_bounds = (x < width) && (y < height) && (entry_id < entry_count); + if (in_bounds) + { + int affected_feature_map_index = z - (entry_id * affected_feature_map_count); + int affected_feature_map_id = affected_feature_map_list[affected_feature_map_index]; + const float * current_input = input + (int)((z * height + y) * width + x); + const float * current_input_low = current_input; + const float * current_input_high = current_input; + const float * current_weights = weights; + float res = *current_input * *current_weights; + #pragma unroll 4 + for(int i = 1; i < window_height; ++i) + { + current_weights++; + if (i < y + 1) + current_input_low -= width; + if (i > y + 1) + current_input_low += width; + if (i < height - y) + current_input_high += width; + if (i > height - y) + current_input_high -= width; + res += (*current_input_low + *current_input_high) * *current_weights; + } + + int offset = ((entry_id * input_feature_map_count + affected_feature_map_id) * height + y) * width + x; + output[offset] = original_input[offset] - res; + } +} + +template +__global__ void local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_hess_kernel( + const float * __restrict input, + const float * __restrict original_input, + float * __restrict output, + const unsigned int * __restrict affected_feature_map_list, + const float * __restrict weights, + int input_feature_map_count, + int affected_feature_map_count, + int width, + int height, + int entry_count) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int z = blockIdx.z * blockDim.z + threadIdx.z; + int entry_id = z / affected_feature_map_count; + + bool in_bounds = (x < width) && (y < height) && (entry_id < entry_count); + if (in_bounds) + { + int affected_feature_map_index = z - (entry_id * affected_feature_map_count); + int affected_feature_map_id = affected_feature_map_list[affected_feature_map_index]; + const float * current_input = input + (int)((z * height + y) * width + x); + const float * current_input_low = current_input; + const float * current_input_high = current_input; + const float * current_weights = weights; + float res = *current_input * *current_weights; + #pragma unroll + for(int i = 1; i < WINDOW_HEIGHT; ++i) + { + current_weights++; + if (i < y + 1) + current_input_low -= width; + if (i > y + 1) + current_input_low += width; + if (i < height - y) + current_input_high += width; + if (i > height - y) + current_input_high -= width; + res += (*current_input_low + *current_input_high) * *current_weights; + } + + int offset = ((entry_id * input_feature_map_count + affected_feature_map_id) * height + y) * width + x; + output[offset] = original_input[offset] - res; + } +} + +__global__ void local_contrast_subtractive_2d_copy_unaffected_hess_kernel( + const float * __restrict original_input, + float * __restrict output, + const unsigned int * __restrict unaffected_feature_map_list, + int input_feature_map_count, + int unaffected_feature_map_count, + int elem_count_per_fature_map, + int entry_count) +{ + int elem_id = blockIdx.x * blockDim.x + threadIdx.x; + int unaffected_feature_map_index = blockIdx.y * blockDim.y + threadIdx.y; + int entry_id = blockIdx.z * blockDim.z + threadIdx.z; + bool in_bounds = (elem_id < elem_count_per_fature_map) && (unaffected_feature_map_index < unaffected_feature_map_count) && (entry_id < entry_count); + if (in_bounds) + { + int unaffected_feature_map_id = unaffected_feature_map_list[unaffected_feature_map_index]; + int offset = (entry_id * input_feature_map_count + unaffected_feature_map_id) * elem_count_per_fature_map + elem_id; + output[offset] = original_input[offset]; + } +} + +__global__ void local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_add_hess_kernel( + const float * __restrict input, + float * __restrict output, + const unsigned int * __restrict affected_feature_map_list, + const float * __restrict weights_squared, + float central_mult, + int input_feature_map_count, + int affected_feature_map_count, + int window_height, + int width, + int height, + int entry_count) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int z = blockIdx.z * blockDim.z + threadIdx.z; + int entry_id = z / affected_feature_map_count; + + bool in_bounds = (x < width) && (y < height) && (entry_id < entry_count); + if (in_bounds) + { + int affected_feature_map_index = z - (entry_id * affected_feature_map_count); + int affected_feature_map_id = affected_feature_map_list[affected_feature_map_index]; + const float * current_input = input + (int)((z * height + y) * width + x); + const float * current_input_low = current_input; + const float * current_input_high = current_input; + const float * current_weights = weights_squared; + float res = *current_input * *current_weights; + #pragma unroll 4 + for(int i = 1; i < window_height; ++i) + { + current_weights++; + if (i < y + 1) + current_input_low -= width; + if (i > y + 1) + current_input_low += width; + if (i < height - y) + current_input_high += width; + if (i > height - y) + current_input_high -= width; + res += (*current_input_low + *current_input_high) * *current_weights; + } + + int offset = ((entry_id * input_feature_map_count + affected_feature_map_id) * height + y) * width + x; + output[offset] = output[offset] * central_mult + res; + } +} + +template +__global__ void local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_add_exact_hess_kernel( + const float * __restrict input, + float * __restrict output, + const unsigned int * __restrict affected_feature_map_list, + const float * __restrict weights_squared, + float central_mult, + int input_feature_map_count, + int affected_feature_map_count, + int width, + int height, + int entry_count) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int z = blockIdx.z * blockDim.z + threadIdx.z; + int entry_id = z / affected_feature_map_count; + + bool in_bounds = (x < width) && (y < height) && (entry_id < entry_count); + if (in_bounds) + { + int affected_feature_map_index = z - (entry_id * affected_feature_map_count); + int affected_feature_map_id = affected_feature_map_list[affected_feature_map_index]; + const float * current_input = input + (int)((z * height + y) * width + x); + const float * current_input_low = current_input; + const float * current_input_high = current_input; + const float * current_weights = weights_squared; + float res = *current_input * *current_weights; + #pragma unroll + for(int i = 1; i < WINDOW_HEIGHT; ++i) + { + current_weights++; + if (i < y + 1) + current_input_low -= width; + if (i > y + 1) + current_input_low += width; + if (i < height - y) + current_input_high += width; + if (i > height - y) + current_input_high -= width; + res += (*current_input_low + *current_input_high) * *current_weights; + } + + int offset = ((entry_id * input_feature_map_count + affected_feature_map_id) * height + y) * width + x; + output[offset] = output[offset] * central_mult + res; + } +} + +namespace nnforge +{ + namespace cuda + { + local_contrast_subtractive_2d_layer_hessian_cuda::local_contrast_subtractive_2d_layer_hessian_cuda() + { + } + + local_contrast_subtractive_2d_layer_hessian_cuda::~local_contrast_subtractive_2d_layer_hessian_cuda() + { + } + + void local_contrast_subtractive_2d_layer_hessian_cuda::enqueue_test( + cudaStream_t stream_id, + const std::vector& schema_data, + const std::vector& data, + const_cuda_linear_buffer_device_smart_ptr input_neurons_buffer, + cuda_linear_buffer_device_smart_ptr output_neurons_buffer, + const std::vector& additional_buffers, + unsigned int entry_count) + { + std::pair kernel_1st_dims = cuda_util::get_grid_and_threadblock_sizes_2d_access( + *cuda_config, + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + affected_feature_map_count * entry_count); + switch(half_window_sizes[0]) + { + case 1: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<1><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 2: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<2><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 3: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<3><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 4: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<4><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 5: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<5><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 6: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<6><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 7: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<7><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 8: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<8><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 9: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<9><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 10: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<10><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + default: + local_contrast_subtractive_2d_blur_horizontal_hess_kernel<<>>( + *input_neurons_buffer, + *additional_buffers[0], + *schema_data[0], + *schema_data[1], + input_configuration_specific.feature_map_count, + affected_feature_map_count, + half_window_sizes[0], + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + entry_count); + break; + } + + std::pair kernel_2nd_dims = cuda_util::get_grid_and_threadblock_sizes_2d_access( + *cuda_config, + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + affected_feature_map_count * entry_count); + switch(half_window_sizes[1]) + { + case 1: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_hess_kernel<1><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 2: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_hess_kernel<2><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 3: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_hess_kernel<3><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 4: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_hess_kernel<4><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 5: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_hess_kernel<5><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 6: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_hess_kernel<6><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 7: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_hess_kernel<7><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 8: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_hess_kernel<8><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 9: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_hess_kernel<9><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 10: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_hess_kernel<10><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + default: + local_contrast_subtractive_2d_blur_vertical_and_subtract_hess_kernel<<>>( + *additional_buffers[0], + *input_neurons_buffer, + *output_neurons_buffer, + *schema_data[0], + *schema_data[2], + input_configuration_specific.feature_map_count, + affected_feature_map_count, + half_window_sizes[1], + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + entry_count); + break; + } + + if (unaffected_feature_map_count > 0) + { + std::pair kernel_dims = cuda_util::get_grid_and_threadblock_sizes_2d_access( + *cuda_config, + input_elem_count_per_feature_map, + unaffected_feature_map_count, + entry_count); + local_contrast_subtractive_2d_copy_unaffected_hess_kernel<<>>( + *input_neurons_buffer, + *output_neurons_buffer, + *schema_data[5], + input_configuration_specific.feature_map_count, + unaffected_feature_map_count, + input_elem_count_per_feature_map, + entry_count); + } + } + + void local_contrast_subtractive_2d_layer_hessian_cuda::enqueue_backprop( + cudaStream_t stream_id, + const std::vector& schema_data, + const std::vector& data, + const_cuda_linear_buffer_device_smart_ptr output_neurons_buffer, + cuda_linear_buffer_device_smart_ptr output_errors_buffer, + cuda_linear_buffer_device_smart_ptr input_errors_buffer, + const std::vector& additional_buffers, + unsigned int entry_count) + { + std::pair kernel_1st_dims = cuda_util::get_grid_and_threadblock_sizes_2d_access( + *cuda_config, + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + affected_feature_map_count * entry_count); + switch(half_window_sizes[0]) + { + case 1: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<1><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[3], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 2: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<2><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[3], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 3: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<3><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[3], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 4: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<4><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[3], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 5: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<5><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[3], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 6: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<6><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[3], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 7: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<7><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[3], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 8: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<8><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[3], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 9: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<9><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[3], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 10: + local_contrast_subtractive_2d_blur_horizontal_exact_hess_kernel<10><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[3], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + default: + local_contrast_subtractive_2d_blur_horizontal_hess_kernel<<>>( + *output_errors_buffer, + *additional_buffers[0], + *schema_data[0], + *schema_data[3], + input_configuration_specific.feature_map_count, + affected_feature_map_count, + half_window_sizes[0], + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + entry_count); + break; + } + + std::pair kernel_2nd_dims = cuda_util::get_grid_and_threadblock_sizes_2d_access( + *cuda_config, + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + affected_feature_map_count * entry_count); + switch(half_window_sizes[1]) + { + case 1: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_add_exact_hess_kernel<1><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[4], central_mult, input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 2: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_add_exact_hess_kernel<2><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[4], central_mult, input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 3: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_add_exact_hess_kernel<3><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[4], central_mult, input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 4: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_add_exact_hess_kernel<4><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[4], central_mult, input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 5: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_add_exact_hess_kernel<5><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[4], central_mult, input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 6: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_add_exact_hess_kernel<6><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[4], central_mult, input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 7: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_add_exact_hess_kernel<7><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[4], central_mult, input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 8: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_add_exact_hess_kernel<8><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[4], central_mult, input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 9: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_add_exact_hess_kernel<9><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[4], central_mult, input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 10: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_add_exact_hess_kernel<10><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[4], central_mult, input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + default: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_add_hess_kernel<<>>( + *additional_buffers[0], + *output_errors_buffer, + *schema_data[0], + *schema_data[4], + central_mult, + input_configuration_specific.feature_map_count, + affected_feature_map_count, + half_window_sizes[1], + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + entry_count); + break; + } + } + + void local_contrast_subtractive_2d_layer_hessian_cuda::hessian_configured() + { + std::tr1::shared_ptr layer_derived = std::tr1::dynamic_pointer_cast(layer_schema); + + affected_feature_map_count = static_cast(layer_derived->feature_maps_affected.size()); + unaffected_feature_map_count = static_cast(layer_derived->feature_maps_unaffected.size()); + + for(std::vector::const_iterator it = layer_derived->window_sizes.begin(); it != layer_derived->window_sizes.end(); ++it) + half_window_sizes.push_back(static_cast((*it + 1) >> 1)); + + central_mult = 1.0F - (2.0F * layer_derived->window_weights_list[0][0] * layer_derived->window_weights_list[1][0]); + } + + std::vector local_contrast_subtractive_2d_layer_hessian_cuda::get_sizes_of_additional_buffers_per_entry() const + { + std::vector res; + + res.push_back(input_elem_count_per_feature_map * affected_feature_map_count * sizeof(float)); + + return res; + } + + bool local_contrast_subtractive_2d_layer_hessian_cuda::is_in_place_backprop() const + { + return true; + } + } +} diff --git a/nnforge/cuda/local_contrast_subtractive_2d_layer_hessian_cuda.h b/nnforge/cuda/local_contrast_subtractive_2d_layer_hessian_cuda.h new file mode 100644 index 0000000..6c79875 --- /dev/null +++ b/nnforge/cuda/local_contrast_subtractive_2d_layer_hessian_cuda.h @@ -0,0 +1,65 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "layer_hessian_cuda.h" + +namespace nnforge +{ + namespace cuda + { + class local_contrast_subtractive_2d_layer_hessian_cuda : public layer_hessian_cuda + { + public: + local_contrast_subtractive_2d_layer_hessian_cuda(); + + virtual ~local_contrast_subtractive_2d_layer_hessian_cuda(); + + virtual void enqueue_test( + cudaStream_t stream_id, + const std::vector& schema_data, + const std::vector& data, + const_cuda_linear_buffer_device_smart_ptr input_neurons_buffer, + cuda_linear_buffer_device_smart_ptr output_neurons_buffer, + const std::vector& additional_buffers, + unsigned int entry_count); + + virtual void enqueue_backprop( + cudaStream_t stream_id, + const std::vector& schema_data, + const std::vector& data, + const_cuda_linear_buffer_device_smart_ptr output_neurons_buffer, + cuda_linear_buffer_device_smart_ptr output_errors_buffer, + cuda_linear_buffer_device_smart_ptr input_errors_buffer, + const std::vector& additional_buffers, + unsigned int entry_count); + + protected: + virtual bool is_in_place_backprop() const; + + virtual void hessian_configured(); + + virtual std::vector get_sizes_of_additional_buffers_per_entry() const; + + private: + int affected_feature_map_count; + int unaffected_feature_map_count; + std::vector half_window_sizes; + float central_mult; + }; + } +} diff --git a/nnforge/cuda/local_contrast_subtractive_2d_layer_updater_cuda.cu b/nnforge/cuda/local_contrast_subtractive_2d_layer_updater_cuda.cu new file mode 100644 index 0000000..ad1a6a9 --- /dev/null +++ b/nnforge/cuda/local_contrast_subtractive_2d_layer_updater_cuda.cu @@ -0,0 +1,618 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "local_contrast_subtractive_2d_layer_updater_cuda.h" + +#include "../local_contrast_subtractive_layer.h" +#include "../neural_network_exception.h" + +#include "util_cuda.h" + +__global__ void local_contrast_subtractive_2d_blur_horizontal_upd_kernel( + const float * __restrict input, + float * __restrict output, + const unsigned int * __restrict affected_feature_map_list, + const float * __restrict weights, + int input_feature_map_count, + int affected_feature_map_count, + int window_width, + int width, + int height, + int entry_count) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int z = blockIdx.z * blockDim.z + threadIdx.z; + int entry_id = z / affected_feature_map_count; + + bool in_bounds = (x < width) && (y < height) && (entry_id < entry_count); + if (in_bounds) + { + int affected_feature_map_index = z - (entry_id * affected_feature_map_count); + int affected_feature_map_id = affected_feature_map_list[affected_feature_map_index]; + const float * current_input = input + (int)(((entry_id * input_feature_map_count + affected_feature_map_id) * height + y) * width + x); + const float * current_input_low = current_input; + const float * current_input_high = current_input; + const float * current_weights = weights; + float res = *current_input * *current_weights; + #pragma unroll 4 + for(int i = 1; i < window_width; ++i) + { + current_weights++; + if (i < x + 1) + current_input_low--; + if (i > x + 1) + current_input_low++; + if (i < width - x) + current_input_high++; + if (i > width - x) + current_input_high--; + res += (*current_input_low + *current_input_high) * *current_weights; + } + + output[(z * height + y) * width + x] = res; + } +} + +template +__global__ void local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel( + const float * __restrict input, + float * __restrict output, + const unsigned int * __restrict affected_feature_map_list, + const float * __restrict weights, + int input_feature_map_count, + int affected_feature_map_count, + int width, + int height, + int entry_count) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int z = blockIdx.z * blockDim.z + threadIdx.z; + int entry_id = z / affected_feature_map_count; + + bool in_bounds = (x < width) && (y < height) && (entry_id < entry_count); + if (in_bounds) + { + int affected_feature_map_index = z - (entry_id * affected_feature_map_count); + int affected_feature_map_id = affected_feature_map_list[affected_feature_map_index]; + const float * current_input = input + (int)(((entry_id * input_feature_map_count + affected_feature_map_id) * height + y) * width + x); + const float * current_input_low = current_input; + const float * current_input_high = current_input; + const float * current_weights = weights; + float res = *current_input * *current_weights; + #pragma unroll + for(int i = 1; i < WINDOW_WIDTH; ++i) + { + current_weights++; + if (i < x + 1) + current_input_low--; + if (i > x + 1) + current_input_low++; + if (i < width - x) + current_input_high++; + if (i > width - x) + current_input_high--; + res += (*current_input_low + *current_input_high) * *current_weights; + } + + output[(z * height + y) * width + x] = res; + } +} + +__global__ void local_contrast_subtractive_2d_blur_vertical_and_subtract_upd_kernel( + const float * __restrict input, + const float * __restrict original_input, + float * __restrict output, + const unsigned int * __restrict affected_feature_map_list, + const float * __restrict weights, + int input_feature_map_count, + int affected_feature_map_count, + int window_height, + int width, + int height, + int entry_count) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int z = blockIdx.z * blockDim.z + threadIdx.z; + int entry_id = z / affected_feature_map_count; + + bool in_bounds = (x < width) && (y < height) && (entry_id < entry_count); + if (in_bounds) + { + int affected_feature_map_index = z - (entry_id * affected_feature_map_count); + int affected_feature_map_id = affected_feature_map_list[affected_feature_map_index]; + const float * current_input = input + (int)((z * height + y) * width + x); + const float * current_input_low = current_input; + const float * current_input_high = current_input; + const float * current_weights = weights; + float res = *current_input * *current_weights; + #pragma unroll 4 + for(int i = 1; i < window_height; ++i) + { + current_weights++; + if (i < y + 1) + current_input_low -= width; + if (i > y + 1) + current_input_low += width; + if (i < height - y) + current_input_high += width; + if (i > height - y) + current_input_high -= width; + res += (*current_input_low + *current_input_high) * *current_weights; + } + + int offset = ((entry_id * input_feature_map_count + affected_feature_map_id) * height + y) * width + x; + output[offset] = original_input[offset] - res; + } +} + +template +__global__ void local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_upd_kernel( + const float * __restrict input, + const float * __restrict original_input, + float * __restrict output, + const unsigned int * __restrict affected_feature_map_list, + const float * __restrict weights, + int input_feature_map_count, + int affected_feature_map_count, + int width, + int height, + int entry_count) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int z = blockIdx.z * blockDim.z + threadIdx.z; + int entry_id = z / affected_feature_map_count; + + bool in_bounds = (x < width) && (y < height) && (entry_id < entry_count); + if (in_bounds) + { + int affected_feature_map_index = z - (entry_id * affected_feature_map_count); + int affected_feature_map_id = affected_feature_map_list[affected_feature_map_index]; + const float * current_input = input + (int)((z * height + y) * width + x); + const float * current_input_low = current_input; + const float * current_input_high = current_input; + const float * current_weights = weights; + float res = *current_input * *current_weights; + #pragma unroll + for(int i = 1; i < WINDOW_HEIGHT; ++i) + { + current_weights++; + if (i < y + 1) + current_input_low -= width; + if (i > y + 1) + current_input_low += width; + if (i < height - y) + current_input_high += width; + if (i > height - y) + current_input_high -= width; + res += (*current_input_low + *current_input_high) * *current_weights; + } + + int offset = ((entry_id * input_feature_map_count + affected_feature_map_id) * height + y) * width + x; + output[offset] = original_input[offset] - res; + } +} + +__global__ void local_contrast_subtractive_2d_copy_unaffected_upd_kernel( + const float * __restrict original_input, + float * __restrict output, + const unsigned int * __restrict unaffected_feature_map_list, + int input_feature_map_count, + int unaffected_feature_map_count, + int elem_count_per_fature_map, + int entry_count) +{ + int elem_id = blockIdx.x * blockDim.x + threadIdx.x; + int unaffected_feature_map_index = blockIdx.y * blockDim.y + threadIdx.y; + int entry_id = blockIdx.z * blockDim.z + threadIdx.z; + bool in_bounds = (elem_id < elem_count_per_fature_map) && (unaffected_feature_map_index < unaffected_feature_map_count) && (entry_id < entry_count); + if (in_bounds) + { + int unaffected_feature_map_id = unaffected_feature_map_list[unaffected_feature_map_index]; + int offset = (entry_id * input_feature_map_count + unaffected_feature_map_id) * elem_count_per_fature_map + elem_id; + output[offset] = original_input[offset]; + } +} + +__global__ void local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_subtract_upd_kernel( + const float * __restrict input, + float * __restrict output, + const unsigned int * __restrict affected_feature_map_list, + const float * __restrict weights, + int input_feature_map_count, + int affected_feature_map_count, + int window_height, + int width, + int height, + int entry_count) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int z = blockIdx.z * blockDim.z + threadIdx.z; + int entry_id = z / affected_feature_map_count; + + bool in_bounds = (x < width) && (y < height) && (entry_id < entry_count); + if (in_bounds) + { + int affected_feature_map_index = z - (entry_id * affected_feature_map_count); + int affected_feature_map_id = affected_feature_map_list[affected_feature_map_index]; + const float * current_input = input + (int)((z * height + y) * width + x); + const float * current_input_low = current_input; + const float * current_input_high = current_input; + const float * current_weights = weights; + float res = *current_input * *current_weights; + #pragma unroll 4 + for(int i = 1; i < window_height; ++i) + { + current_weights++; + if (i < y + 1) + current_input_low -= width; + if (i > y + 1) + current_input_low += width; + if (i < height - y) + current_input_high += width; + if (i > height - y) + current_input_high -= width; + res += (*current_input_low + *current_input_high) * *current_weights; + } + + output[((entry_id * input_feature_map_count + affected_feature_map_id) * height + y) * width + x] -= res; + } +} + +template +__global__ void local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_subtract_exact_upd_kernel( + const float * __restrict input, + float * __restrict output, + const unsigned int * __restrict affected_feature_map_list, + const float * __restrict weights, + int input_feature_map_count, + int affected_feature_map_count, + int width, + int height, + int entry_count) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int z = blockIdx.z * blockDim.z + threadIdx.z; + int entry_id = z / affected_feature_map_count; + + bool in_bounds = (x < width) && (y < height) && (entry_id < entry_count); + if (in_bounds) + { + int affected_feature_map_index = z - (entry_id * affected_feature_map_count); + int affected_feature_map_id = affected_feature_map_list[affected_feature_map_index]; + const float * current_input = input + (int)((z * height + y) * width + x); + const float * current_input_low = current_input; + const float * current_input_high = current_input; + const float * current_weights = weights; + float res = *current_input * *current_weights; + #pragma unroll + for(int i = 1; i < WINDOW_HEIGHT; ++i) + { + current_weights++; + if (i < y + 1) + current_input_low -= width; + if (i > y + 1) + current_input_low += width; + if (i < height - y) + current_input_high += width; + if (i > height - y) + current_input_high -= width; + res += (*current_input_low + *current_input_high) * *current_weights; + } + + output[((entry_id * input_feature_map_count + affected_feature_map_id) * height + y) * width + x] -= res; + } +} + +namespace nnforge +{ + namespace cuda + { + local_contrast_subtractive_2d_layer_updater_cuda::local_contrast_subtractive_2d_layer_updater_cuda() + { + } + + local_contrast_subtractive_2d_layer_updater_cuda::~local_contrast_subtractive_2d_layer_updater_cuda() + { + } + + void local_contrast_subtractive_2d_layer_updater_cuda::enqueue_test( + unsigned int offset_input_entry_id, + cudaStream_t stream_id, + const std::vector& schema_data, + const std::vector& data, + const_cuda_linear_buffer_device_smart_ptr input_neurons_buffer, + cuda_linear_buffer_device_smart_ptr output_neurons_buffer, + const std::vector& additional_buffers, + std::vector& dynamic_memobjects, + unsigned int entry_count) + { + std::pair kernel_1st_dims = cuda_util::get_grid_and_threadblock_sizes_2d_access( + *cuda_config, + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + affected_feature_map_count * entry_count); + switch(half_window_sizes[0]) + { + case 1: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<1><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 2: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<2><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 3: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<3><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 4: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<4><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 5: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<5><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 6: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<6><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 7: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<7><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 8: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<8><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 9: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<9><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 10: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<10><<>>(*input_neurons_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + default: + local_contrast_subtractive_2d_blur_horizontal_upd_kernel<<>>( + *input_neurons_buffer, + *additional_buffers[0], + *schema_data[0], + *schema_data[1], + input_configuration_specific.feature_map_count, + affected_feature_map_count, + half_window_sizes[0], + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + entry_count); + break; + } + + std::pair kernel_2nd_dims = cuda_util::get_grid_and_threadblock_sizes_2d_access( + *cuda_config, + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + affected_feature_map_count * entry_count); + switch(half_window_sizes[1]) + { + case 1: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_upd_kernel<1><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 2: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_upd_kernel<2><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 3: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_upd_kernel<3><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 4: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_upd_kernel<4><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 5: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_upd_kernel<5><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 6: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_upd_kernel<6><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 7: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_upd_kernel<7><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 8: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_upd_kernel<8><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 9: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_upd_kernel<9><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 10: + local_contrast_subtractive_2d_blur_vertical_and_subtract_exact_upd_kernel<10><<>>(*additional_buffers[0], *input_neurons_buffer, *output_neurons_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + default: + local_contrast_subtractive_2d_blur_vertical_and_subtract_upd_kernel<<>>( + *additional_buffers[0], + *input_neurons_buffer, + *output_neurons_buffer, + *schema_data[0], + *schema_data[2], + input_configuration_specific.feature_map_count, + affected_feature_map_count, + half_window_sizes[1], + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + entry_count); + break; + } + + if (unaffected_feature_map_count > 0) + { + std::pair kernel_dims = cuda_util::get_grid_and_threadblock_sizes_2d_access( + *cuda_config, + input_elem_count_per_feature_map, + unaffected_feature_map_count, + entry_count); + local_contrast_subtractive_2d_copy_unaffected_upd_kernel<<>>( + *input_neurons_buffer, + *output_neurons_buffer, + *schema_data[3], + input_configuration_specific.feature_map_count, + unaffected_feature_map_count, + input_elem_count_per_feature_map, + entry_count); + } + } + + void local_contrast_subtractive_2d_layer_updater_cuda::enqueue_backprop( + cudaStream_t stream_id, + const std::vector& schema_data, + const std::vector& data, + const_cuda_linear_buffer_device_smart_ptr output_neurons_buffer, + const_cuda_linear_buffer_device_smart_ptr input_neurons_buffer, + cuda_linear_buffer_device_smart_ptr output_errors_buffer, + cuda_linear_buffer_device_smart_ptr input_errors_buffer, + const std::vector& additional_buffers, + std::vector& dynamic_memobjects, + unsigned int entry_count) + { + std::pair kernel_1st_dims = cuda_util::get_grid_and_threadblock_sizes_2d_access( + *cuda_config, + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + affected_feature_map_count * entry_count); + switch(half_window_sizes[0]) + { + case 1: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<1><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 2: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<2><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 3: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<3><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 4: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<4><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 5: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<5><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 6: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<6><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 7: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<7><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 8: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<8><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 9: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<9><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 10: + local_contrast_subtractive_2d_blur_horizontal_exact_upd_kernel<10><<>>(*output_errors_buffer, *additional_buffers[0], *schema_data[0], *schema_data[1], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + default: + local_contrast_subtractive_2d_blur_horizontal_upd_kernel<<>>( + *output_errors_buffer, + *additional_buffers[0], + *schema_data[0], + *schema_data[1], + input_configuration_specific.feature_map_count, + affected_feature_map_count, + half_window_sizes[0], + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + entry_count); + break; + } + + std::pair kernel_2nd_dims = cuda_util::get_grid_and_threadblock_sizes_2d_access( + *cuda_config, + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + affected_feature_map_count * entry_count); + switch(half_window_sizes[1]) + { + case 1: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_subtract_exact_upd_kernel<1><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 2: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_subtract_exact_upd_kernel<2><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 3: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_subtract_exact_upd_kernel<3><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 4: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_subtract_exact_upd_kernel<4><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 5: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_subtract_exact_upd_kernel<5><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 6: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_subtract_exact_upd_kernel<6><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 7: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_subtract_exact_upd_kernel<7><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 8: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_subtract_exact_upd_kernel<8><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 9: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_subtract_exact_upd_kernel<9><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + case 10: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_subtract_exact_upd_kernel<10><<>>(*additional_buffers[0], *output_errors_buffer, *schema_data[0], *schema_data[2], input_configuration_specific.feature_map_count, affected_feature_map_count, input_configuration_specific.dimension_sizes[0], input_configuration_specific.dimension_sizes[1], entry_count); + break; + default: + local_contrast_subtractive_2d_square_deriviative_blur_vertical_and_subtract_upd_kernel<<>>( + *additional_buffers[0], + *output_errors_buffer, + *schema_data[0], + *schema_data[4], + input_configuration_specific.feature_map_count, + affected_feature_map_count, + half_window_sizes[1], + input_configuration_specific.dimension_sizes[0], + input_configuration_specific.dimension_sizes[1], + entry_count); + break; + } + } + + void local_contrast_subtractive_2d_layer_updater_cuda::updater_configured() + { + if (!different_input) + throw neural_network_exception("hyperbolic_tangent_layer_updater_cuda is not able to run using the same input"); + + std::tr1::shared_ptr layer_derived = std::tr1::dynamic_pointer_cast(layer_schema); + + affected_feature_map_count = static_cast(layer_derived->feature_maps_affected.size()); + unaffected_feature_map_count = static_cast(layer_derived->feature_maps_unaffected.size()); + + for(std::vector::const_iterator it = layer_derived->window_sizes.begin(); it != layer_derived->window_sizes.end(); ++it) + half_window_sizes.push_back(static_cast((*it + 1) >> 1)); + + central_mult = 1.0F - (2.0F * layer_derived->window_weights_list[0][0] * layer_derived->window_weights_list[1][0]); + } + + std::vector local_contrast_subtractive_2d_layer_updater_cuda::get_sizes_of_additional_buffers_per_entry() const + { + std::vector res; + + res.push_back(input_elem_count_per_feature_map * affected_feature_map_count * sizeof(float)); + + return res; + } + + bool local_contrast_subtractive_2d_layer_updater_cuda::is_in_place_backprop() const + { + return true; + } + } +} diff --git a/nnforge/cuda/local_contrast_subtractive_2d_layer_updater_cuda.h b/nnforge/cuda/local_contrast_subtractive_2d_layer_updater_cuda.h new file mode 100644 index 0000000..84a46e2 --- /dev/null +++ b/nnforge/cuda/local_contrast_subtractive_2d_layer_updater_cuda.h @@ -0,0 +1,69 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "layer_updater_cuda.h" + +namespace nnforge +{ + namespace cuda + { + class local_contrast_subtractive_2d_layer_updater_cuda : public layer_updater_cuda + { + public: + local_contrast_subtractive_2d_layer_updater_cuda(); + + virtual ~local_contrast_subtractive_2d_layer_updater_cuda(); + + virtual void enqueue_test( + unsigned int offset_input_entry_id, + cudaStream_t stream_id, + const std::vector& schema_data, + const std::vector& data, + const_cuda_linear_buffer_device_smart_ptr input_neurons_buffer, + cuda_linear_buffer_device_smart_ptr output_neurons_buffer, + const std::vector& additional_buffers, + std::vector& dynamic_memobjects, + unsigned int entry_count); + + virtual void enqueue_backprop( + cudaStream_t stream_id, + const std::vector& schema_data, + const std::vector& data, + const_cuda_linear_buffer_device_smart_ptr output_neurons_buffer, + const_cuda_linear_buffer_device_smart_ptr input_neurons_buffer, + cuda_linear_buffer_device_smart_ptr output_errors_buffer, + cuda_linear_buffer_device_smart_ptr input_errors_buffer, + const std::vector& additional_buffers, + std::vector& dynamic_memobjects, + unsigned int entry_count); + + protected: + virtual void updater_configured(); + + virtual bool is_in_place_backprop() const; + + virtual std::vector get_sizes_of_additional_buffers_per_entry() const; + + private: + int affected_feature_map_count; + int unaffected_feature_map_count; + std::vector half_window_sizes; + float central_mult; + }; + } +} diff --git a/nnforge/cuda/local_contrast_subtractive_layer_hessian_schema.cpp b/nnforge/cuda/local_contrast_subtractive_layer_hessian_schema.cpp new file mode 100644 index 0000000..bcbe99a --- /dev/null +++ b/nnforge/cuda/local_contrast_subtractive_layer_hessian_schema.cpp @@ -0,0 +1,113 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "local_contrast_subtractive_layer_hessian_schema.h" + +#include "../local_contrast_subtractive_layer.h" +#include "../neural_network_exception.h" +#include "local_contrast_subtractive_2d_layer_hessian_cuda.h" + +#include + +namespace nnforge +{ + namespace cuda + { + local_contrast_subtractive_layer_hessian_schema::local_contrast_subtractive_layer_hessian_schema() + { + } + + local_contrast_subtractive_layer_hessian_schema::~local_contrast_subtractive_layer_hessian_schema() + { + } + + std::tr1::shared_ptr local_contrast_subtractive_layer_hessian_schema::create_specific() const + { + return layer_hessian_schema_smart_ptr(new local_contrast_subtractive_layer_hessian_schema()); + } + + const boost::uuids::uuid& local_contrast_subtractive_layer_hessian_schema::get_uuid() const + { + return local_contrast_subtractive_layer::layer_guid; + } + + layer_hessian_cuda_smart_ptr local_contrast_subtractive_layer_hessian_schema::create_hessian_specific( + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific) const + { + layer_hessian_cuda_smart_ptr res; + + switch (output_configuration_specific.dimension_sizes.size()) + { + case 2: + res = layer_hessian_cuda_smart_ptr(new local_contrast_subtractive_2d_layer_hessian_cuda()); + break; + default: + throw neural_network_exception((boost::format("No CUDA hessian for the local contrast subtractive layer of %1% dimensions") % output_configuration_specific.dimension_sizes.size()).str()); + break; + } + + return res; + } + + std::vector local_contrast_subtractive_layer_hessian_schema::get_schema_buffers() const + { + std::vector res; + + std::tr1::shared_ptr layer_derived = std::tr1::dynamic_pointer_cast(layer_schema); + + res.push_back( + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device( + &(*layer_derived->feature_maps_affected.begin()), + layer_derived->feature_maps_affected.size() * sizeof(unsigned int))) + ); + + for(std::vector >::const_iterator it = layer_derived->window_weights_list.begin(); it != layer_derived->window_weights_list.end(); ++it) + { + const std::vector& current_weights = *it; + res.push_back( + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device( + &(*current_weights.begin()), + current_weights.size() * sizeof(float))) + ); + } + + for(std::vector >::const_iterator it = layer_derived->window_weights_list.begin(); it != layer_derived->window_weights_list.end(); ++it) + { + const std::vector& current_weights = *it; + std::vector current_weights_squared; + for(std::vector::const_iterator it2 = current_weights.begin(); it2 != current_weights.end(); ++it2) + current_weights_squared.push_back(*it2 * *it2); + res.push_back( + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device( + &(*current_weights_squared.begin()), + current_weights_squared.size() * sizeof(float))) + ); + } + + if (!layer_derived->feature_maps_unaffected.empty()) + { + res.push_back( + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device( + &(*layer_derived->feature_maps_unaffected.begin()), + layer_derived->feature_maps_unaffected.size() * sizeof(unsigned int))) + ); + } + + return res; + } + } +} diff --git a/nnforge/cuda/local_contrast_subtractive_layer_hessian_schema.h b/nnforge/cuda/local_contrast_subtractive_layer_hessian_schema.h new file mode 100644 index 0000000..71e8ef2 --- /dev/null +++ b/nnforge/cuda/local_contrast_subtractive_layer_hessian_schema.h @@ -0,0 +1,46 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "layer_hessian_schema.h" + +#include + +namespace nnforge +{ + namespace cuda + { + class local_contrast_subtractive_layer_hessian_schema : public layer_hessian_schema + { + public: + local_contrast_subtractive_layer_hessian_schema(); + + virtual ~local_contrast_subtractive_layer_hessian_schema(); + + virtual const boost::uuids::uuid& get_uuid() const; + + virtual std::vector get_schema_buffers() const; + + protected: + virtual std::tr1::shared_ptr create_specific() const; + + virtual layer_hessian_cuda_smart_ptr create_hessian_specific( + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific) const; + }; + } +} diff --git a/nnforge/cuda/local_contrast_subtractive_layer_updater_schema.cpp b/nnforge/cuda/local_contrast_subtractive_layer_updater_schema.cpp new file mode 100644 index 0000000..661ed6f --- /dev/null +++ b/nnforge/cuda/local_contrast_subtractive_layer_updater_schema.cpp @@ -0,0 +1,100 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "local_contrast_subtractive_layer_updater_schema.h" + +#include "../local_contrast_subtractive_layer.h" +#include "../neural_network_exception.h" +#include "local_contrast_subtractive_2d_layer_updater_cuda.h" + +#include + +namespace nnforge +{ + namespace cuda + { + local_contrast_subtractive_layer_updater_schema::local_contrast_subtractive_layer_updater_schema() + { + } + + local_contrast_subtractive_layer_updater_schema::~local_contrast_subtractive_layer_updater_schema() + { + } + + std::tr1::shared_ptr local_contrast_subtractive_layer_updater_schema::create_specific() const + { + return layer_updater_schema_smart_ptr(new local_contrast_subtractive_layer_updater_schema()); + } + + const boost::uuids::uuid& local_contrast_subtractive_layer_updater_schema::get_uuid() const + { + return local_contrast_subtractive_layer::layer_guid; + } + + layer_updater_cuda_smart_ptr local_contrast_subtractive_layer_updater_schema::create_updater_specific( + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific) const + { + layer_updater_cuda_smart_ptr res; + + switch (output_configuration_specific.dimension_sizes.size()) + { + case 2: + res = layer_updater_cuda_smart_ptr(new local_contrast_subtractive_2d_layer_updater_cuda()); + break; + default: + throw neural_network_exception((boost::format("No CUDA updater for the local contrast subtractive layer of %1% dimensions") % output_configuration_specific.dimension_sizes.size()).str()); + break; + } + + return res; + } + + std::vector local_contrast_subtractive_layer_updater_schema::get_schema_buffers() const + { + std::vector res; + + std::tr1::shared_ptr layer_derived = std::tr1::dynamic_pointer_cast(layer_schema); + + res.push_back( + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device( + &(*layer_derived->feature_maps_affected.begin()), + layer_derived->feature_maps_affected.size() * sizeof(unsigned int))) + ); + + for(std::vector >::const_iterator it = layer_derived->window_weights_list.begin(); it != layer_derived->window_weights_list.end(); ++it) + { + const std::vector& current_weights = *it; + res.push_back( + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device( + &(*current_weights.begin()), + current_weights.size() * sizeof(float))) + ); + } + + if (!layer_derived->feature_maps_unaffected.empty()) + { + res.push_back( + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device( + &(*layer_derived->feature_maps_unaffected.begin()), + layer_derived->feature_maps_unaffected.size() * sizeof(unsigned int))) + ); + } + + return res; + } + } +} diff --git a/nnforge/cuda/local_contrast_subtractive_layer_updater_schema.h b/nnforge/cuda/local_contrast_subtractive_layer_updater_schema.h new file mode 100644 index 0000000..2aaf2f1 --- /dev/null +++ b/nnforge/cuda/local_contrast_subtractive_layer_updater_schema.h @@ -0,0 +1,46 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "layer_updater_schema.h" + +#include + +namespace nnforge +{ + namespace cuda + { + class local_contrast_subtractive_layer_updater_schema : public layer_updater_schema + { + public: + local_contrast_subtractive_layer_updater_schema(); + + virtual ~local_contrast_subtractive_layer_updater_schema(); + + virtual const boost::uuids::uuid& get_uuid() const; + + virtual std::vector get_schema_buffers() const; + + protected: + virtual std::tr1::shared_ptr create_specific() const; + + virtual layer_updater_cuda_smart_ptr create_updater_specific( + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific) const; + }; + } +} diff --git a/nnforge/local_contrast_subtractive_layer.cpp b/nnforge/local_contrast_subtractive_layer.cpp index 2efd135..e40630e 100644 --- a/nnforge/local_contrast_subtractive_layer.cpp +++ b/nnforge/local_contrast_subtractive_layer.cpp @@ -47,15 +47,16 @@ namespace nnforge if (window_sizes.size() == 0) throw neural_network_exception("window sizes for local contrast subtractive layer may not be empty"); - if (feature_maps_affected.empty()) - throw neural_network_exception("affected feature map list for local contrast subtractive layer may not be empty"); - for(unsigned int i = 0; i < window_sizes.size(); i++) { if (window_sizes[i] == 0) throw neural_network_exception("window dimension for local contrast subtractive layer may not be zero"); } + if (this->feature_maps_affected.empty()) + for(unsigned int i = 0; i < feature_map_count; i++) + this->feature_maps_affected.push_back(i); + std::sort(this->feature_maps_affected.begin(), this->feature_maps_affected.end()); for(unsigned int i = 0; i < feature_map_count; i++) { diff --git a/nnforge/plain/hessian_calculator_plain.cpp b/nnforge/plain/hessian_calculator_plain.cpp index f0c793d..cf5babc 100644 --- a/nnforge/plain/hessian_calculator_plain.cpp +++ b/nnforge/plain/hessian_calculator_plain.cpp @@ -87,7 +87,7 @@ namespace nnforge } } - unsigned int max_entry_count = std::min(plain_config->get_max_entry_count(buffers_config), 5/*hessian_entry_to_process_count*/); + unsigned int max_entry_count = std::min(plain_config->get_max_entry_count(buffers_config), hessian_entry_to_process_count); std::vector input_buf(max_entry_count * input_neuron_count * input_neuron_elem_size); additional_buffer_smart_ptr initial_error_buf(new std::vector(max_entry_count * output_neuron_count)); diff --git a/nnforge/plain/local_contrast_subtractive_layer_hessian_plain.cpp b/nnforge/plain/local_contrast_subtractive_layer_hessian_plain.cpp new file mode 100644 index 0000000..ac48f82 --- /dev/null +++ b/nnforge/plain/local_contrast_subtractive_layer_hessian_plain.cpp @@ -0,0 +1,313 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "local_contrast_subtractive_layer_hessian_plain.h" + +#ifdef _OPENMP +#include +#endif + +#include "../local_contrast_subtractive_layer.h" + +namespace nnforge +{ + namespace plain + { + local_contrast_subtractive_layer_hessian_plain::local_contrast_subtractive_layer_hessian_plain() + { + } + + local_contrast_subtractive_layer_hessian_plain::~local_contrast_subtractive_layer_hessian_plain() + { + } + + const boost::uuids::uuid& local_contrast_subtractive_layer_hessian_plain::get_uuid() const + { + return local_contrast_subtractive_layer::layer_guid; + } + + void local_contrast_subtractive_layer_hessian_plain::test( + const_additional_buffer_smart_ptr input_buffer, + additional_buffer_smart_ptr output_buffer, + std::vector& additional_buffers, + plain_running_configuration_const_smart_ptr plain_config, + const_layer_smart_ptr layer_schema, + const_layer_data_smart_ptr data, + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific, + unsigned int entry_count) const + { + const unsigned int input_neuron_count = input_configuration_specific.get_neuron_count(); + const unsigned int input_neuron_count_per_feature_map = input_configuration_specific.get_neuron_count_per_feature_map(); + const unsigned int output_neuron_count = output_configuration_specific.get_neuron_count(); + const unsigned int output_neuron_count_per_feature_map = output_configuration_specific.get_neuron_count_per_feature_map(); + std::tr1::shared_ptr layer_derived = std::tr1::dynamic_pointer_cast(layer_schema); + const std::vector >& window_weights_list = layer_derived->window_weights_list; + const std::vector& feature_maps_affected = layer_derived->feature_maps_affected; + const std::vector& feature_maps_unaffected = layer_derived->feature_maps_unaffected; + const unsigned int dimension_count = static_cast(window_weights_list.size()); + std::vector input_slices(input_configuration_specific.dimension_sizes.size()); + input_slices[0] = 1; + for(unsigned int i = 0; i < dimension_count - 1; ++i) + input_slices[i + 1] = input_slices[i] * input_configuration_specific.dimension_sizes[i]; + + const std::vector::const_iterator dimension_sizes_it = output_configuration_specific.dimension_sizes.begin(); + const unsigned int feature_maps_affected_count = static_cast(feature_maps_affected.size()); + const unsigned int feature_maps_unaffected_count = static_cast(feature_maps_affected.size()); + const std::vector::const_iterator input_slices_it = input_slices.begin(); + const std::vector::const_iterator feature_maps_affected_it = feature_maps_affected.begin(); + const std::vector::const_iterator input_buffer_it = input_buffer->begin(); + const std::vector::iterator output_buffer_it = output_buffer->begin(); + const std::vector >::const_iterator window_weights_list_it = window_weights_list.begin(); + + const int total_workload = entry_count * feature_maps_affected_count; + const int openmp_thread_count = plain_config->openmp_thread_count; + + #pragma omp parallel default(none) shared(additional_buffers) num_threads(openmp_thread_count) + { + std::vector local_additional_buffers; + int thread_id = 0; + #ifdef _OPENMP + thread_id = omp_get_thread_num(); + #endif + + local_additional_buffers.push_back(additional_buffers[thread_id]); + if (dimension_count > 1) + local_additional_buffers.push_back(additional_buffers[openmp_thread_count + thread_id]); + + #pragma omp for schedule(guided) + for(int workload_id = 0; workload_id < total_workload; ++workload_id) + { + int entry_id = workload_id / feature_maps_affected_count; + int affected_feature_map_id = workload_id - (entry_id * feature_maps_affected_count); + + unsigned int current_output_buffer_index = 0; + unsigned int feature_map_id = *(feature_maps_affected_it + affected_feature_map_id); + for(unsigned int dimension_id = 0; dimension_id < dimension_count; ++dimension_id) + { + std::vector::iterator out_it_base = local_additional_buffers[current_output_buffer_index]->begin(); + std::vector::const_iterator in_it; + if (dimension_id > 0) + in_it = local_additional_buffers[1 - current_output_buffer_index]->begin(); + else + in_it = input_buffer_it + (entry_id * input_neuron_count) + (feature_map_id * input_neuron_count_per_feature_map); + int max_output_size = *(dimension_sizes_it + dimension_id); + int input_slice_size = *(input_slices_it + dimension_id); + + std::vector current_output_position(dimension_count, 0); + for(std::vector::iterator out_it = out_it_base; out_it != out_it_base + output_neuron_count_per_feature_map; ++out_it, ++in_it) + { + const std::vector& current_window_weights_list = *(window_weights_list_it + dimension_id); + float sum = *in_it * current_window_weights_list[0]; + + int current_position = static_cast(current_output_position[dimension_id]); + int dest_forward = current_position; + int dest_backward = dest_forward; + for (std::vector::const_iterator it = current_window_weights_list.begin() + 1; it != current_window_weights_list.end(); ++it) + { + dest_forward++; + dest_backward--; + int dest_forward_actual = (dest_forward < max_output_size) ? dest_forward : (((max_output_size << 1) - 1) - dest_forward); + int dest_backward_actual = (dest_backward >= 0) ? dest_backward : (-1 - dest_backward); + int offset_forward = ((dest_forward_actual - current_position) * input_slice_size); + int offset_backward = ((dest_backward_actual - current_position) * input_slice_size); + sum += (*(in_it + offset_forward) + *(in_it + offset_backward)) * (*it); + } + + *out_it = sum; + + // Go to the next output element + for(unsigned int i = 0; i < dimension_count; ++i) + { + if ((++current_output_position[i]) < *(dimension_sizes_it + i)) + break; + current_output_position[i] = 0; + } + } + + current_output_buffer_index = 1 - current_output_buffer_index; + } // for(unsigned int dimension_id + + // Subtract the gaussian blur + { + std::vector::const_iterator original_in_it = input_buffer_it + (entry_id * input_neuron_count) + (feature_map_id * input_neuron_count_per_feature_map); + std::vector::iterator out_it = output_buffer_it + (entry_id * input_neuron_count) + (feature_map_id * input_neuron_count_per_feature_map); + std::vector::const_iterator in_it = local_additional_buffers[1 - current_output_buffer_index]->begin(); + for(int i = 0; i < static_cast(input_neuron_count_per_feature_map); ++i) + *(out_it + i) = *(original_in_it + i) - *(in_it + i); + } + } + } // #pragma parallel + + if (feature_maps_unaffected_count > 0) + { + for(unsigned int entry_id = 0; entry_id < entry_count; ++entry_id) + { + for(std::vector::const_iterator it = feature_maps_unaffected.begin(); it != feature_maps_unaffected.end(); ++it) + { + unsigned int feature_map_id = *it; + std::vector::const_iterator original_in_it = input_buffer_it + (entry_id * input_neuron_count) + (feature_map_id * input_neuron_count_per_feature_map); + std::vector::iterator out_it = output_buffer_it + (entry_id * input_neuron_count) + (feature_map_id * input_neuron_count_per_feature_map); + std::copy(original_in_it, original_in_it + input_neuron_count_per_feature_map, out_it); + } + } + } + } + + void local_contrast_subtractive_layer_hessian_plain::backprop( + additional_buffer_smart_ptr input_errors, + const_additional_buffer_smart_ptr output_errors, + const_additional_buffer_smart_ptr output_neurons, + std::vector& additional_buffers, + plain_running_configuration_const_smart_ptr plain_config, + const_layer_smart_ptr layer_schema, + const_layer_data_smart_ptr data, + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific, + unsigned int entry_count) const + { + const unsigned int input_neuron_count = input_configuration_specific.get_neuron_count(); + const unsigned int input_neuron_count_per_feature_map = input_configuration_specific.get_neuron_count_per_feature_map(); + const unsigned int output_neuron_count = output_configuration_specific.get_neuron_count(); + const unsigned int output_neuron_count_per_feature_map = output_configuration_specific.get_neuron_count_per_feature_map(); + std::tr1::shared_ptr layer_derived = std::tr1::dynamic_pointer_cast(layer_schema); + const std::vector >& window_weights_list = layer_derived->window_weights_list; + const std::vector& feature_maps_affected = layer_derived->feature_maps_affected; + const unsigned int dimension_count = static_cast(window_weights_list.size()); + std::vector input_slices(input_configuration_specific.dimension_sizes.size()); + input_slices[0] = 1; + for(unsigned int i = 0; i < dimension_count - 1; ++i) + input_slices[i + 1] = input_slices[i] * input_configuration_specific.dimension_sizes[i]; + + const std::vector::const_iterator dimension_sizes_it = output_configuration_specific.dimension_sizes.begin(); + const unsigned int feature_maps_affected_count = static_cast(feature_maps_affected.size()); + const std::vector::const_iterator input_slices_it = input_slices.begin(); + const std::vector::const_iterator feature_maps_affected_it = feature_maps_affected.begin(); + const std::vector::iterator input_buffer_it = input_errors->begin(); + const std::vector >::const_iterator window_weights_list_it = window_weights_list.begin(); + + float central_weight = 1.0F; + for(std::vector >::const_iterator it = window_weights_list.begin(); it != window_weights_list.end(); ++it) + central_weight *= it->at(0); + const float const_central_mult = 1 - (2.0F * central_weight); + + const int total_workload = entry_count * feature_maps_affected_count; + const int openmp_thread_count = plain_config->openmp_thread_count; + + #pragma omp parallel default(none) shared(additional_buffers) num_threads(openmp_thread_count) + { + std::vector local_additional_buffers; + int thread_id = 0; + #ifdef _OPENMP + thread_id = omp_get_thread_num(); + #endif + + local_additional_buffers.push_back(additional_buffers[thread_id]); + if (dimension_count > 1) + local_additional_buffers.push_back(additional_buffers[openmp_thread_count + thread_id]); + + #pragma omp for schedule(guided) + for(int workload_id = 0; workload_id < total_workload; ++workload_id) + { + int entry_id = workload_id / feature_maps_affected_count; + int affected_feature_map_id = workload_id - (entry_id * feature_maps_affected_count); + + unsigned int current_output_buffer_index = 0; + unsigned int feature_map_id = *(feature_maps_affected_it + affected_feature_map_id); + for(unsigned int dimension_id = 0; dimension_id < dimension_count; ++dimension_id) + { + std::vector::iterator out_it_base = local_additional_buffers[current_output_buffer_index]->begin(); + std::vector::const_iterator in_it; + if (dimension_id > 0) + in_it = local_additional_buffers[1 - current_output_buffer_index]->begin(); + else + in_it = input_buffer_it + (entry_id * input_neuron_count) + (feature_map_id * input_neuron_count_per_feature_map); + int max_output_size = *(dimension_sizes_it + dimension_id); + int input_slice_size = *(input_slices_it + dimension_id); + + std::vector current_output_position(dimension_count, 0); + for(std::vector::iterator out_it = out_it_base; out_it != out_it_base + output_neuron_count_per_feature_map; ++out_it, ++in_it) + { + const std::vector& current_window_weights_list = *(window_weights_list_it + dimension_id); + float weight = current_window_weights_list[0]; + float sum = *in_it * (weight * weight); + + int current_position = static_cast(current_output_position[dimension_id]); + int dest_forward = current_position; + int dest_backward = dest_forward; + for (std::vector::const_iterator it = current_window_weights_list.begin() + 1; it != current_window_weights_list.end(); ++it) + { + dest_forward++; + dest_backward--; + int dest_forward_actual = (dest_forward < max_output_size) ? dest_forward : (((max_output_size << 1) - 1) - dest_forward); + int dest_backward_actual = (dest_backward >= 0) ? dest_backward : (-1 - dest_backward); + int offset_forward = ((dest_forward_actual - current_position) * input_slice_size); + int offset_backward = ((dest_backward_actual - current_position) * input_slice_size); + float weight = *it; + sum += (*(in_it + offset_forward) + *(in_it + offset_backward)) * (weight * weight); + } + + *out_it = sum; + + // Go to the next output element + for(unsigned int i = 0; i < dimension_count; ++i) + { + if ((++current_output_position[i]) < *(dimension_sizes_it + i)) + break; + current_output_position[i] = 0; + } + } + + current_output_buffer_index = 1 - current_output_buffer_index; + } // for(unsigned int dimension_id + + { + std::vector::iterator out_it = input_buffer_it + (entry_id * input_neuron_count) + (feature_map_id * input_neuron_count_per_feature_map); + std::vector::const_iterator in_it = local_additional_buffers[1 - current_output_buffer_index]->begin(); + for(int i = 0; i < static_cast(input_neuron_count_per_feature_map); ++i) + *(out_it + i) = (*(out_it + i) * const_central_mult) + *(in_it + i); + } + } + } // #pragma parallel + } + + std::vector > local_contrast_subtractive_layer_hessian_plain::get_elem_count_and_per_entry_flag_additional_buffers( + const_layer_smart_ptr layer_schema, + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific, + plain_running_configuration_const_smart_ptr plain_config, + bool backprop_required) const + { + std::vector > res; + + std::tr1::shared_ptr layer_derived = std::tr1::dynamic_pointer_cast(layer_schema); + unsigned int elem_count_per_intermediate_elem = static_cast(layer_derived->feature_maps_affected.size() * output_configuration_specific.get_neuron_count_per_feature_map()); + + for(int i = 0; i < plain_config->openmp_thread_count; ++i) + res.push_back(std::make_pair(elem_count_per_intermediate_elem, false)); + if (input_configuration_specific.dimension_sizes.size() > 1) + for(int i = 0; i < plain_config->openmp_thread_count; ++i) + res.push_back(std::make_pair(elem_count_per_intermediate_elem, false)); + + return res; + } + + bool local_contrast_subtractive_layer_hessian_plain::is_in_place_backprop() const + { + return true; + } + } +} diff --git a/nnforge/plain/local_contrast_subtractive_layer_hessian_plain.h b/nnforge/plain/local_contrast_subtractive_layer_hessian_plain.h new file mode 100644 index 0000000..c10850b --- /dev/null +++ b/nnforge/plain/local_contrast_subtractive_layer_hessian_plain.h @@ -0,0 +1,68 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "layer_hessian_plain.h" + +namespace nnforge +{ + namespace plain + { + class local_contrast_subtractive_layer_hessian_plain : public layer_hessian_plain + { + public: + local_contrast_subtractive_layer_hessian_plain(); + + virtual ~local_contrast_subtractive_layer_hessian_plain(); + + virtual const boost::uuids::uuid& get_uuid() const; + + virtual void test( + const_additional_buffer_smart_ptr input_buffer, + additional_buffer_smart_ptr output_buffer, + std::vector& additional_buffers, + plain_running_configuration_const_smart_ptr plain_config, + const_layer_smart_ptr layer_schema, + const_layer_data_smart_ptr data, + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific, + unsigned int entry_count) const; + + virtual void backprop( + additional_buffer_smart_ptr input_errors, + const_additional_buffer_smart_ptr output_errors, + const_additional_buffer_smart_ptr output_neurons, + std::vector& additional_buffers, + plain_running_configuration_const_smart_ptr plain_config, + const_layer_smart_ptr layer_schema, + const_layer_data_smart_ptr data, + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific, + unsigned int entry_count) const; + + protected: + virtual bool is_in_place_backprop() const; + + virtual std::vector > get_elem_count_and_per_entry_flag_additional_buffers( + const_layer_smart_ptr layer_schema, + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific, + plain_running_configuration_const_smart_ptr plain_config, + bool backprop_required) const; + }; + } +} diff --git a/nnforge/plain/local_contrast_subtractive_layer_updater_plain.cpp b/nnforge/plain/local_contrast_subtractive_layer_updater_plain.cpp new file mode 100644 index 0000000..3ddb1e3 --- /dev/null +++ b/nnforge/plain/local_contrast_subtractive_layer_updater_plain.cpp @@ -0,0 +1,312 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "local_contrast_subtractive_layer_updater_plain.h" + +#ifdef _OPENMP +#include +#endif + +#include "../local_contrast_subtractive_layer.h" +#include "../neural_network_exception.h" + +namespace nnforge +{ + namespace plain + { + local_contrast_subtractive_layer_updater_plain::local_contrast_subtractive_layer_updater_plain() + { + } + + local_contrast_subtractive_layer_updater_plain::~local_contrast_subtractive_layer_updater_plain() + { + } + + const boost::uuids::uuid& local_contrast_subtractive_layer_updater_plain::get_uuid() const + { + return local_contrast_subtractive_layer::layer_guid; + } + + void local_contrast_subtractive_layer_updater_plain::test( + const_additional_buffer_smart_ptr input_buffer, + additional_buffer_smart_ptr output_buffer, + std::vector& additional_buffers, + plain_running_configuration_const_smart_ptr plain_config, + const_layer_smart_ptr layer_schema, + const layer_data_list& data, + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific, + unsigned int updater_count, + int offset_input_entry_id) const + { + if (offset_input_entry_id >= 0) + throw neural_network_exception("local_contrast_subtractive_layer_updater_plain is not able to run using the same input"); + + const unsigned int input_neuron_count = input_configuration_specific.get_neuron_count(); + const unsigned int input_neuron_count_per_feature_map = input_configuration_specific.get_neuron_count_per_feature_map(); + const unsigned int output_neuron_count = output_configuration_specific.get_neuron_count(); + const unsigned int output_neuron_count_per_feature_map = output_configuration_specific.get_neuron_count_per_feature_map(); + std::tr1::shared_ptr layer_derived = std::tr1::dynamic_pointer_cast(layer_schema); + const std::vector >& window_weights_list = layer_derived->window_weights_list; + const std::vector& feature_maps_affected = layer_derived->feature_maps_affected; + const std::vector& feature_maps_unaffected = layer_derived->feature_maps_unaffected; + const unsigned int dimension_count = static_cast(window_weights_list.size()); + std::vector input_slices(input_configuration_specific.dimension_sizes.size()); + input_slices[0] = 1; + for(unsigned int i = 0; i < dimension_count - 1; ++i) + input_slices[i + 1] = input_slices[i] * input_configuration_specific.dimension_sizes[i]; + + const std::vector::const_iterator dimension_sizes_it = output_configuration_specific.dimension_sizes.begin(); + const unsigned int feature_maps_affected_count = static_cast(feature_maps_affected.size()); + const unsigned int feature_maps_unaffected_count = static_cast(feature_maps_affected.size()); + const std::vector::const_iterator input_slices_it = input_slices.begin(); + const std::vector::const_iterator feature_maps_affected_it = feature_maps_affected.begin(); + const std::vector::const_iterator input_buffer_it = input_buffer->begin(); + const std::vector::iterator output_buffer_it = output_buffer->begin(); + const std::vector >::const_iterator window_weights_list_it = window_weights_list.begin(); + + const int total_workload = updater_count * feature_maps_affected_count; + const int openmp_thread_count = plain_config->openmp_thread_count; + + #pragma omp parallel default(none) shared(additional_buffers) num_threads(openmp_thread_count) + { + std::vector local_additional_buffers; + int thread_id = 0; + #ifdef _OPENMP + thread_id = omp_get_thread_num(); + #endif + + local_additional_buffers.push_back(additional_buffers[thread_id]); + if (dimension_count > 1) + local_additional_buffers.push_back(additional_buffers[openmp_thread_count + thread_id]); + + #pragma omp for schedule(guided) + for(int workload_id = 0; workload_id < total_workload; ++workload_id) + { + int entry_id = workload_id / feature_maps_affected_count; + int affected_feature_map_id = workload_id - (entry_id * feature_maps_affected_count); + + unsigned int current_output_buffer_index = 0; + unsigned int feature_map_id = *(feature_maps_affected_it + affected_feature_map_id); + for(unsigned int dimension_id = 0; dimension_id < dimension_count; ++dimension_id) + { + std::vector::iterator out_it_base = local_additional_buffers[current_output_buffer_index]->begin(); + std::vector::const_iterator in_it; + if (dimension_id > 0) + in_it = local_additional_buffers[1 - current_output_buffer_index]->begin(); + else + in_it = input_buffer_it + (entry_id * input_neuron_count) + (feature_map_id * input_neuron_count_per_feature_map); + int max_output_size = *(dimension_sizes_it + dimension_id); + int input_slice_size = *(input_slices_it + dimension_id); + + std::vector current_output_position(dimension_count, 0); + for(std::vector::iterator out_it = out_it_base; out_it != out_it_base + output_neuron_count_per_feature_map; ++out_it, ++in_it) + { + const std::vector& current_window_weights_list = *(window_weights_list_it + dimension_id); + float sum = *in_it * current_window_weights_list[0]; + + int current_position = static_cast(current_output_position[dimension_id]); + int dest_forward = current_position; + int dest_backward = dest_forward; + for (std::vector::const_iterator it = current_window_weights_list.begin() + 1; it != current_window_weights_list.end(); ++it) + { + dest_forward++; + dest_backward--; + int dest_forward_actual = (dest_forward < max_output_size) ? dest_forward : (((max_output_size << 1) - 1) - dest_forward); + int dest_backward_actual = (dest_backward >= 0) ? dest_backward : (-1 - dest_backward); + int offset_forward = ((dest_forward_actual - current_position) * input_slice_size); + int offset_backward = ((dest_backward_actual - current_position) * input_slice_size); + sum += (*(in_it + offset_forward) + *(in_it + offset_backward)) * (*it); + } + + *out_it = sum; + + // Go to the next output element + for(unsigned int i = 0; i < dimension_count; ++i) + { + if ((++current_output_position[i]) < *(dimension_sizes_it + i)) + break; + current_output_position[i] = 0; + } + } + + current_output_buffer_index = 1 - current_output_buffer_index; + } // for(unsigned int dimension_id + + // Subtract the gaussian blur + { + std::vector::const_iterator original_in_it = input_buffer_it + (entry_id * input_neuron_count) + (feature_map_id * input_neuron_count_per_feature_map); + std::vector::iterator out_it = output_buffer_it + (entry_id * input_neuron_count) + (feature_map_id * input_neuron_count_per_feature_map); + std::vector::const_iterator in_it = local_additional_buffers[1 - current_output_buffer_index]->begin(); + for(int i = 0; i < static_cast(input_neuron_count_per_feature_map); ++i) + *(out_it + i) = *(original_in_it + i) - *(in_it + i); + } + } + } // #pragma parallel + + if (feature_maps_unaffected_count > 0) + { + for(unsigned int entry_id = 0; entry_id < updater_count; ++entry_id) + { + for(std::vector::const_iterator it = feature_maps_unaffected.begin(); it != feature_maps_unaffected.end(); ++it) + { + unsigned int feature_map_id = *it; + std::vector::const_iterator original_in_it = input_buffer_it + (entry_id * input_neuron_count) + (feature_map_id * input_neuron_count_per_feature_map); + std::vector::iterator out_it = output_buffer_it + (entry_id * input_neuron_count) + (feature_map_id * input_neuron_count_per_feature_map); + std::copy(original_in_it, original_in_it + input_neuron_count_per_feature_map, out_it); + } + } + } + } + + void local_contrast_subtractive_layer_updater_plain::backprop( + additional_buffer_smart_ptr input_errors, + const_additional_buffer_smart_ptr input_neurons, + const_additional_buffer_smart_ptr output_errors, + const_additional_buffer_smart_ptr output_neurons, + std::vector& additional_buffers, + plain_running_configuration_const_smart_ptr plain_config, + const_layer_smart_ptr layer_schema, + const layer_data_list& data, + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific, + unsigned int updater_count) const + { + const unsigned int input_neuron_count = input_configuration_specific.get_neuron_count(); + const unsigned int input_neuron_count_per_feature_map = input_configuration_specific.get_neuron_count_per_feature_map(); + const unsigned int output_neuron_count = output_configuration_specific.get_neuron_count(); + const unsigned int output_neuron_count_per_feature_map = output_configuration_specific.get_neuron_count_per_feature_map(); + std::tr1::shared_ptr layer_derived = std::tr1::dynamic_pointer_cast(layer_schema); + const std::vector >& window_weights_list = layer_derived->window_weights_list; + const std::vector& feature_maps_affected = layer_derived->feature_maps_affected; + const unsigned int dimension_count = static_cast(window_weights_list.size()); + std::vector input_slices(input_configuration_specific.dimension_sizes.size()); + input_slices[0] = 1; + for(unsigned int i = 0; i < dimension_count - 1; ++i) + input_slices[i + 1] = input_slices[i] * input_configuration_specific.dimension_sizes[i]; + + const std::vector::const_iterator dimension_sizes_it = output_configuration_specific.dimension_sizes.begin(); + const unsigned int feature_maps_affected_count = static_cast(feature_maps_affected.size()); + const std::vector::const_iterator input_slices_it = input_slices.begin(); + const std::vector::const_iterator feature_maps_affected_it = feature_maps_affected.begin(); + const std::vector::iterator input_buffer_it = input_errors->begin(); + const std::vector >::const_iterator window_weights_list_it = window_weights_list.begin(); + + const int total_workload = updater_count * feature_maps_affected_count; + const int openmp_thread_count = plain_config->openmp_thread_count; + + #pragma omp parallel default(none) shared(additional_buffers) num_threads(openmp_thread_count) + { + std::vector local_additional_buffers; + int thread_id = 0; + #ifdef _OPENMP + thread_id = omp_get_thread_num(); + #endif + + local_additional_buffers.push_back(additional_buffers[thread_id]); + if (dimension_count > 1) + local_additional_buffers.push_back(additional_buffers[openmp_thread_count + thread_id]); + + #pragma omp for schedule(guided) + for(int workload_id = 0; workload_id < total_workload; ++workload_id) + { + int entry_id = workload_id / feature_maps_affected_count; + int affected_feature_map_id = workload_id - (entry_id * feature_maps_affected_count); + + unsigned int current_output_buffer_index = 0; + unsigned int feature_map_id = *(feature_maps_affected_it + affected_feature_map_id); + for(unsigned int dimension_id = 0; dimension_id < dimension_count; ++dimension_id) + { + std::vector::iterator out_it_base = local_additional_buffers[current_output_buffer_index]->begin(); + std::vector::const_iterator in_it; + if (dimension_id > 0) + in_it = local_additional_buffers[1 - current_output_buffer_index]->begin(); + else + in_it = input_buffer_it + (entry_id * input_neuron_count) + (feature_map_id * input_neuron_count_per_feature_map); + int max_output_size = *(dimension_sizes_it + dimension_id); + int input_slice_size = *(input_slices_it + dimension_id); + + std::vector current_output_position(dimension_count, 0); + for(std::vector::iterator out_it = out_it_base; out_it != out_it_base + output_neuron_count_per_feature_map; ++out_it, ++in_it) + { + const std::vector& current_window_weights_list = *(window_weights_list_it + dimension_id); + float sum = *in_it * current_window_weights_list[0]; + + int current_position = static_cast(current_output_position[dimension_id]); + int dest_forward = current_position; + int dest_backward = dest_forward; + for (std::vector::const_iterator it = current_window_weights_list.begin() + 1; it != current_window_weights_list.end(); ++it) + { + dest_forward++; + dest_backward--; + int dest_forward_actual = (dest_forward < max_output_size) ? dest_forward : (((max_output_size << 1) - 1) - dest_forward); + int dest_backward_actual = (dest_backward >= 0) ? dest_backward : (-1 - dest_backward); + int offset_forward = ((dest_forward_actual - current_position) * input_slice_size); + int offset_backward = ((dest_backward_actual - current_position) * input_slice_size); + sum += (*(in_it + offset_forward) + *(in_it + offset_backward)) * (*it); + } + + *out_it = sum; + + // Go to the next output element + for(unsigned int i = 0; i < dimension_count; ++i) + { + if ((++current_output_position[i]) < *(dimension_sizes_it + i)) + break; + current_output_position[i] = 0; + } + } + + current_output_buffer_index = 1 - current_output_buffer_index; + } // for(unsigned int dimension_id + + { + std::vector::iterator out_it = input_buffer_it + (entry_id * input_neuron_count) + (feature_map_id * input_neuron_count_per_feature_map); + std::vector::const_iterator in_it = local_additional_buffers[1 - current_output_buffer_index]->begin(); + for(int i = 0; i < static_cast(input_neuron_count_per_feature_map); ++i) + *(out_it + i) -= *(in_it + i); + } + } + } // #pragma parallel + } + + std::vector > local_contrast_subtractive_layer_updater_plain::get_elem_count_and_per_entry_flag_additional_buffers( + const_layer_smart_ptr layer_schema, + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific, + plain_running_configuration_const_smart_ptr plain_config, + bool backprop_required) const + { + std::vector > res; + + std::tr1::shared_ptr layer_derived = std::tr1::dynamic_pointer_cast(layer_schema); + unsigned int elem_count_per_intermediate_elem = static_cast(layer_derived->feature_maps_affected.size() * output_configuration_specific.get_neuron_count_per_feature_map()); + + for(int i = 0; i < plain_config->openmp_thread_count; ++i) + res.push_back(std::make_pair(elem_count_per_intermediate_elem, false)); + if (input_configuration_specific.dimension_sizes.size() > 1) + for(int i = 0; i < plain_config->openmp_thread_count; ++i) + res.push_back(std::make_pair(elem_count_per_intermediate_elem, false)); + + return res; + } + + bool local_contrast_subtractive_layer_updater_plain::is_in_place_backprop() const + { + return true; + } + } +} diff --git a/nnforge/plain/local_contrast_subtractive_layer_updater_plain.h b/nnforge/plain/local_contrast_subtractive_layer_updater_plain.h new file mode 100644 index 0000000..37719ae --- /dev/null +++ b/nnforge/plain/local_contrast_subtractive_layer_updater_plain.h @@ -0,0 +1,73 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "layer_updater_plain.h" + +namespace nnforge +{ + namespace plain + { + class local_contrast_subtractive_layer_updater_plain : public layer_updater_plain + { + public: + local_contrast_subtractive_layer_updater_plain(); + + virtual ~local_contrast_subtractive_layer_updater_plain(); + + virtual const boost::uuids::uuid& get_uuid() const; + + virtual void test( + const_additional_buffer_smart_ptr input_buffer, + additional_buffer_smart_ptr output_buffer, + std::vector& additional_buffers, + plain_running_configuration_const_smart_ptr plain_config, + const_layer_smart_ptr layer_schema, + const layer_data_list& data, + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific, + unsigned int updater_count, + int offset_input_entry_id) const; + + virtual void backprop( + additional_buffer_smart_ptr input_errors, + const_additional_buffer_smart_ptr input_neurons, + const_additional_buffer_smart_ptr output_errors, + const_additional_buffer_smart_ptr output_neurons, + std::vector& additional_buffers, + plain_running_configuration_const_smart_ptr plain_config, + const_layer_smart_ptr layer_schema, + const layer_data_list& data, + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific, + unsigned int updater_count) const; + + protected: + virtual bool is_in_place_backprop() const; + + virtual std::vector > get_elem_count_and_per_entry_flag_additional_buffers( + const_layer_smart_ptr layer_schema, + const layer_configuration_specific& input_configuration_specific, + const layer_configuration_specific& output_configuration_specific, + plain_running_configuration_const_smart_ptr plain_config, + bool backprop_required) const; + + private: + static const int max_dimension_count; + }; + } +} diff --git a/nnforge/plain/plain.cpp b/nnforge/plain/plain.cpp index b80c435..c381edb 100644 --- a/nnforge/plain/plain.cpp +++ b/nnforge/plain/plain.cpp @@ -34,6 +34,7 @@ #include "hyperbolic_tangent_layer_hessian_plain.h" #include "average_subsampling_layer_hessian_plain.h" #include "max_subsampling_layer_hessian_plain.h" +#include "local_contrast_subtractive_layer_hessian_plain.h" #include "convolution_layer_hessian_plain.h" #include "rectified_linear_layer_hessian_plain.h" #include "soft_rectified_linear_layer_hessian_plain.h" @@ -45,6 +46,7 @@ #include "hyperbolic_tangent_layer_updater_plain.h" #include "average_subsampling_layer_updater_plain.h" #include "max_subsampling_layer_updater_plain.h" +#include "local_contrast_subtractive_layer_updater_plain.h" #include "convolution_layer_updater_plain.h" #include "rectified_linear_layer_updater_plain.h" #include "soft_rectified_linear_layer_updater_plain.h" @@ -77,6 +79,7 @@ namespace nnforge single_layer_hessian_plain_factory::get_mutable_instance().register_layer_hessian_plain(layer_hessian_plain_smart_ptr(new hyperbolic_tangent_layer_hessian_plain())); single_layer_hessian_plain_factory::get_mutable_instance().register_layer_hessian_plain(layer_hessian_plain_smart_ptr(new average_subsampling_layer_hessian_plain())); single_layer_hessian_plain_factory::get_mutable_instance().register_layer_hessian_plain(layer_hessian_plain_smart_ptr(new max_subsampling_layer_hessian_plain())); + single_layer_hessian_plain_factory::get_mutable_instance().register_layer_hessian_plain(layer_hessian_plain_smart_ptr(new local_contrast_subtractive_layer_hessian_plain())); single_layer_hessian_plain_factory::get_mutable_instance().register_layer_hessian_plain(layer_hessian_plain_smart_ptr(new convolution_layer_hessian_plain())); single_layer_hessian_plain_factory::get_mutable_instance().register_layer_hessian_plain(layer_hessian_plain_smart_ptr(new rectified_linear_layer_hessian_plain())); single_layer_hessian_plain_factory::get_mutable_instance().register_layer_hessian_plain(layer_hessian_plain_smart_ptr(new soft_rectified_linear_layer_hessian_plain())); @@ -87,6 +90,7 @@ namespace nnforge single_layer_updater_plain_factory::get_mutable_instance().register_layer_updater_plain(layer_updater_plain_smart_ptr(new hyperbolic_tangent_layer_updater_plain())); single_layer_updater_plain_factory::get_mutable_instance().register_layer_updater_plain(layer_updater_plain_smart_ptr(new average_subsampling_layer_updater_plain())); single_layer_updater_plain_factory::get_mutable_instance().register_layer_updater_plain(layer_updater_plain_smart_ptr(new max_subsampling_layer_updater_plain())); + single_layer_updater_plain_factory::get_mutable_instance().register_layer_updater_plain(layer_updater_plain_smart_ptr(new local_contrast_subtractive_layer_updater_plain())); single_layer_updater_plain_factory::get_mutable_instance().register_layer_updater_plain(layer_updater_plain_smart_ptr(new convolution_layer_updater_plain())); single_layer_updater_plain_factory::get_mutable_instance().register_layer_updater_plain(layer_updater_plain_smart_ptr(new rectified_linear_layer_updater_plain())); single_layer_updater_plain_factory::get_mutable_instance().register_layer_updater_plain(layer_updater_plain_smart_ptr(new soft_rectified_linear_layer_updater_plain()));