diff --git a/Settings.mk b/Settings.mk index 3e01e01..9ba2fe6 100644 --- a/Settings.mk +++ b/Settings.mk @@ -5,8 +5,8 @@ OPENCV_PATH=/usr/local CUDA_PATH=/usr/local/cuda NVCC=nvcc NNFORGE_PATH=../.. -NNFORGE_INPUT_DATA_PATH=~/nnforge/input_data -NNFORGE_WORKING_DATA_PATH=~/nnforge/working_data +NNFORGE_INPUT_DATA_PATH=/home/max/nnforge/input_data +NNFORGE_WORKING_DATA_PATH=/home/max/nnforge/working_data BOOST_LIBS=-lboost_regex-mt -lboost_chrono-mt -lboost_filesystem-mt -lboost_program_options-mt -lboost_random-mt -lboost_system-mt -lboost_date_time-mt OPENCV_LIBS=-lopencv_highgui -lopencv_imgproc -lopencv_core diff --git a/examples/gtsrb/gtsrb_toolset.cpp b/examples/gtsrb/gtsrb_toolset.cpp index f52cc1d..44a057e 100644 --- a/examples/gtsrb/gtsrb_toolset.cpp +++ b/examples/gtsrb/gtsrb_toolset.cpp @@ -54,7 +54,10 @@ void gtsrb_toolset::prepare_data() void gtsrb_toolset::prepare_training_data() { - std::tr1::shared_ptr file_with_data(new boost::filesystem::ofstream(get_working_data_folder() / training_data_filename, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc)); + boost::filesystem::path file_path = get_working_data_folder() / training_data_filename; + std::cout << "Writing data to " << file_path.string() << std::endl; + + std::tr1::shared_ptr file_with_data(new boost::filesystem::ofstream(file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc)); nnforge::layer_configuration_specific input_configuration; input_configuration.feature_map_count = is_color ? 3 : 1; input_configuration.dimension_sizes.push_back(image_width); @@ -63,7 +66,7 @@ void gtsrb_toolset::prepare_training_data() output_configuration.feature_map_count = class_count; output_configuration.dimension_sizes.push_back(1); output_configuration.dimension_sizes.push_back(1); - nnforge::supervised_data_stream_writer_byte writer( + nnforge::supervised_data_stream_writer writer( file_with_data, input_configuration, output_configuration); @@ -83,7 +86,10 @@ void gtsrb_toolset::prepare_training_data() void gtsrb_toolset::prepare_validating_data() { - std::tr1::shared_ptr file_with_data(new boost::filesystem::ofstream(get_working_data_folder() / validating_data_filename, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc)); + boost::filesystem::path file_path = get_working_data_folder() / validating_data_filename; + std::cout << "Writing data to " << file_path.string() << std::endl; + + std::tr1::shared_ptr file_with_data(new boost::filesystem::ofstream(file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc)); nnforge::layer_configuration_specific input_configuration; input_configuration.feature_map_count = is_color ? 3 : 1; input_configuration.dimension_sizes.push_back(image_width); @@ -92,7 +98,7 @@ void gtsrb_toolset::prepare_validating_data() output_configuration.feature_map_count = class_count; output_configuration.dimension_sizes.push_back(1); output_configuration.dimension_sizes.push_back(1); - nnforge::supervised_data_stream_writer_byte writer( + nnforge::supervised_data_stream_writer writer( file_with_data, input_configuration, output_configuration); @@ -108,7 +114,7 @@ void gtsrb_toolset::prepare_validating_data() } void gtsrb_toolset::write_folder( - nnforge::supervised_data_stream_writer_byte& writer, + nnforge::supervised_data_stream_writer& writer, const boost::filesystem::path& relative_subfolder_path, const char * annotation_file_name, bool jitter) @@ -116,6 +122,8 @@ void gtsrb_toolset::write_folder( boost::filesystem::path subfolder_path = get_input_data_folder() / relative_subfolder_path; boost::filesystem::path annotation_file_path = subfolder_path / annotation_file_name; + std::cout << "Reading input data from " << subfolder_path.string() << std::endl; + boost::filesystem::ifstream file_input(annotation_file_path, std::ios_base::in); nnforge::random_generator generator = nnforge::rnd::get_random_generator(); @@ -188,7 +196,7 @@ void gtsrb_toolset::write_folder( } void gtsrb_toolset::write_signle_entry( - nnforge::supervised_data_stream_writer_byte& writer, + nnforge::supervised_data_stream_writer& writer, const boost::filesystem::path& absolute_file_path, unsigned int class_id, unsigned int roi_top_left_x, diff --git a/examples/gtsrb/gtsrb_toolset.h b/examples/gtsrb/gtsrb_toolset.h index 1243028..dfa0b9f 100644 --- a/examples/gtsrb/gtsrb_toolset.h +++ b/examples/gtsrb/gtsrb_toolset.h @@ -53,7 +53,7 @@ class gtsrb_toolset : public nnforge::neural_network_toolset void prepare_validating_data(); void write_signle_entry( - nnforge::supervised_data_stream_writer_byte& writer, + nnforge::supervised_data_stream_writer& writer, const boost::filesystem::path& absolute_file_path, unsigned int class_id, unsigned int roi_top_left_x, @@ -68,7 +68,7 @@ class gtsrb_toolset : public nnforge::neural_network_toolset float brightness_shift = 0.0F); void write_folder( - nnforge::supervised_data_stream_writer_byte& writer, + nnforge::supervised_data_stream_writer& writer, const boost::filesystem::path& relative_subfolder_path, const char * annotation_file_name, bool jitter); diff --git a/nnforge/cuda/cuda_linear_buffer.cpp b/nnforge/cuda/cuda_linear_buffer.cpp index 5c9f4cd..a5d52eb 100644 --- a/nnforge/cuda/cuda_linear_buffer.cpp +++ b/nnforge/cuda/cuda_linear_buffer.cpp @@ -68,6 +68,16 @@ namespace nnforge return (unsigned char *)(get_buf()); } + cuda_linear_buffer::operator uchar4 *() + { + return (uchar4 *)(get_buf()); + } + + cuda_linear_buffer::operator const uchar4 *() const + { + return (uchar4 *)(get_buf()); + } + cuda_linear_buffer::operator unsigned int *() { return (unsigned int *)(get_buf()); diff --git a/nnforge/cuda/cuda_linear_buffer.h b/nnforge/cuda/cuda_linear_buffer.h index 239af39..3994c24 100644 --- a/nnforge/cuda/cuda_linear_buffer.h +++ b/nnforge/cuda/cuda_linear_buffer.h @@ -45,6 +45,10 @@ namespace nnforge operator const unsigned char *() const; + operator uchar4 *(); + + operator const uchar4 *() const; + operator unsigned int *(); operator const unsigned int *() const; diff --git a/nnforge/cuda/cuda_util.cu b/nnforge/cuda/cuda_util.cu index a071154..cfd2fe0 100644 --- a/nnforge/cuda/cuda_util.cu +++ b/nnforge/cuda/cuda_util.cu @@ -288,7 +288,7 @@ namespace nnforge int total_thread_count, int divisible) { - int initial_threadblock_count = total_thread_count / 256; + int initial_threadblock_count = std::max(total_thread_count / 256, 1); int minimum_threadblock_count = cuda_config.multiprocessor_count * 8; if (initial_threadblock_count >= minimum_threadblock_count) diff --git a/nnforge/cuda/hessian_calculator_cuda.cu b/nnforge/cuda/hessian_calculator_cuda.cu index 2ca5b13..bdcdb11 100644 --- a/nnforge/cuda/hessian_calculator_cuda.cu +++ b/nnforge/cuda/hessian_calculator_cuda.cu @@ -29,24 +29,20 @@ #include __global__ void convert_compacted_to_raw_hess_kernel( - const unsigned char * __restrict input, - float * __restrict output, - const float * __restrict scale_addition, - const float * __restrict scale_multiplication, - int elem_count_per_feature_map, - int feature_map_count, - int entry_count) + const uchar4 * __restrict input, + float4 * __restrict output, + int elem_count) { - int elem_id_inside_feature_map = blockIdx.x * blockDim.x + threadIdx.x; - int feature_map_id = blockIdx.y * blockDim.y + threadIdx.y; - int entry_id = blockIdx.z * blockDim.z + threadIdx.z; - bool in_bounds = (entry_id < entry_count) && (elem_id_inside_feature_map < elem_count_per_feature_map) && (feature_map_id < feature_map_count); - if (in_bounds) + int elem_id = blockDim.x * (blockIdx.y * gridDim.x + blockIdx.x) + threadIdx.x; + if (elem_id < elem_count) { - int offset = elem_count_per_feature_map * (entry_id * feature_map_count + feature_map_id) + elem_id_inside_feature_map; - unsigned char val = input[offset]; - float converted_val = ((val * (1.0F / 255.0F)) + scale_addition[feature_map_id]) * scale_multiplication[feature_map_id]; - output[offset] = converted_val; + uchar4 inp = input[elem_id]; + float4 val; + val.x = inp.x * (1.0F / 255.0F); + val.y = inp.y * (1.0F / 255.0F); + val.z = inp.z * (1.0F / 255.0F); + val.w = inp.w * (1.0F / 255.0F); + output[elem_id] = val; } } @@ -66,9 +62,8 @@ namespace nnforge { hessian_calculator_cuda::hessian_calculator_cuda( network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params, cuda_running_configuration_const_smart_ptr cuda_config) - : hessian_calculator(schema, scale_params) + : hessian_calculator(schema) , cuda_config(cuda_config) { const const_layer_list& layer_list = *schema; @@ -111,7 +106,7 @@ namespace nnforge } network_data_smart_ptr hessian_calculator_cuda::actual_get_hessian( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, network_data_smart_ptr data, unsigned int hessian_entry_to_process_count) { @@ -125,7 +120,8 @@ namespace nnforge unsigned int input_neuron_count = input_configuration.get_neuron_count(); unsigned int input_neuron_count_per_feature_map = input_configuration.get_neuron_count_per_feature_map(); unsigned int output_neuron_count = output_configuration.get_neuron_count(); - unsigned int input_feature_map_count = input_configuration.feature_map_count; + neuron_data_type::input_type type_code = reader.get_input_type(); + size_t input_neuron_elem_size = reader.get_input_neuron_elem_size(); std::vector > net_data = enqueue_get_data(data, *command_stream); std::vector > net_data_squared = enqueue_get_data_squared(net_data, *command_stream); @@ -134,8 +130,8 @@ namespace nnforge buffer_cuda_size_configuration buffers_config; update_buffers_configuration(buffers_config); - buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(unsigned char)); // input - buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(unsigned char)); // input + buffers_config.add_per_entry_buffer(input_neuron_count * input_neuron_elem_size); // input + buffers_config.add_per_entry_buffer(input_neuron_count * input_neuron_elem_size); // input buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(float)); // converted input buffers_config.add_per_entry_buffer(output_neuron_count * sizeof(float)); // initial error @@ -155,8 +151,8 @@ namespace nnforge cuda_linear_buffer_device_smart_ptr input_buf[2] = { - cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * sizeof(unsigned char))), - cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * sizeof(unsigned char))), + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * input_neuron_elem_size)), + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * input_neuron_elem_size)), }; cuda_linear_buffer_device_smart_ptr input_converted_buf(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * sizeof(float))); @@ -190,7 +186,7 @@ namespace nnforge output_errors = all_buffers.input_errors_buffer; } - cuda_linear_buffer_host_smart_ptr input_host_buf(new cuda_linear_buffer_host(input_neuron_count * max_entry_count * sizeof(unsigned char))); + cuda_linear_buffer_host_smart_ptr input_host_buf(new cuda_linear_buffer_host(input_neuron_count * max_entry_count * input_neuron_elem_size)); unsigned char * input = *input_host_buf; unsigned int current_data_slot = 0; @@ -209,21 +205,27 @@ namespace nnforge if (entries_available_for_processing_count > 0) { // Convert input + if (type_code == neuron_data_type::type_byte) { - std::pair convert_compacted_to_raw_2d_surf_kernel_dims = cuda_util::get_grid_and_threadblock_sizes_sequential_access( + int elem_count = (input_neuron_count * entries_available_for_processing_count + 3) / 4; + std::pair kernel_dims = cuda_util::get_grid_and_threadblock_sizes_sequential_access( *cuda_config, - input_neuron_count_per_feature_map, - input_feature_map_count, - entries_available_for_processing_count); - convert_compacted_to_raw_hess_kernel<<>>( + elem_count); + convert_compacted_to_raw_hess_kernel<<>>( *input_buf[current_command_slot], *input_converted_buf, - *scale_addition, - *scale_multiplication, - input_neuron_count_per_feature_map, - input_feature_map_count, - entries_available_for_processing_count); + elem_count); + } + else if (type_code == neuron_data_type::type_float) + { + cuda_safe_call(cudaMemcpyAsync( + *input_converted_buf, + *input_buf[current_command_slot], + input_neuron_count * entries_available_for_processing_count * sizeof(float), + cudaMemcpyDeviceToDevice, + *command_stream)); } + else throw neural_network_exception((boost::format("actual_get_hessian cannot handle input neurons of type %1%") % type_code).str()); // Run ann { @@ -313,7 +315,7 @@ namespace nnforge unsigned int entries_to_read_count = std::min(max_entry_count, entries_available_for_copy_in_count); while(entries_read_count < entries_to_read_count) { - bool entry_read = reader.read(input + (input_neuron_count * entries_read_count), 0); + bool entry_read = reader.read(input + (input_neuron_count * entries_read_count * input_neuron_elem_size), 0); if (!entry_read) break; @@ -323,7 +325,7 @@ namespace nnforge cuda_safe_call(cudaMemcpyAsync( *(input_buf[current_data_slot]), input, - entries_read_count * input_neuron_count * sizeof(unsigned char), + entries_read_count * input_neuron_count * input_neuron_elem_size, cudaMemcpyHostToDevice, *data_stream)); } @@ -367,14 +369,6 @@ namespace nnforge *(it_conf + 1), (it_conf > layer_config_list.begin() + testing_layer_count))); } - - scale_multiplication = cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device( - &(*current_scale_params->multiplication_list.begin()), - current_scale_params->multiplication_list.size() * sizeof(float))); - - scale_addition = cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device( - &(*current_scale_params->addition_list.begin()), - current_scale_params->addition_list.size() * sizeof(float))); } std::vector > hessian_calculator_cuda::enqueue_get_data( @@ -500,9 +494,6 @@ namespace nnforge void hessian_calculator_cuda::update_buffers_configuration(buffer_cuda_size_configuration& buffer_configuration) const { - buffer_configuration.add_constant_buffer(scale_addition->get_size()); - buffer_configuration.add_constant_buffer(scale_multiplication->get_size()); - for(std::vector >::const_iterator it = testing_schema_data.begin(); it != testing_schema_data.end(); ++it) for(std::vector::const_iterator it2 = it->begin(); it2 != it->end(); ++it2) buffer_configuration.add_constant_buffer((*it2)->get_size()); diff --git a/nnforge/cuda/hessian_calculator_cuda.h b/nnforge/cuda/hessian_calculator_cuda.h index ef9188b..2df7900 100644 --- a/nnforge/cuda/hessian_calculator_cuda.h +++ b/nnforge/cuda/hessian_calculator_cuda.h @@ -32,7 +32,6 @@ namespace nnforge public: hessian_calculator_cuda( network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params, cuda_running_configuration_const_smart_ptr cuda_config); virtual ~hessian_calculator_cuda(); @@ -40,7 +39,7 @@ namespace nnforge protected: // schema, data and reader are guaranteed to be compatible virtual network_data_smart_ptr actual_get_hessian( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, network_data_smart_ptr data, unsigned int hessian_entry_to_process_count); @@ -86,9 +85,6 @@ namespace nnforge unsigned int testing_layer_count; const_layer_list::const_iterator start_layer_nonempty_weights_iterator; - const_cuda_linear_buffer_device_smart_ptr scale_addition; - const_cuda_linear_buffer_device_smart_ptr scale_multiplication; - const_layer_testing_schema_list testing_schemas; std::vector > testing_schema_data; std::vector tester_list; diff --git a/nnforge/cuda/hessian_calculator_cuda_factory.cpp b/nnforge/cuda/hessian_calculator_cuda_factory.cpp index 96e738a..34a7545 100644 --- a/nnforge/cuda/hessian_calculator_cuda_factory.cpp +++ b/nnforge/cuda/hessian_calculator_cuda_factory.cpp @@ -31,11 +31,9 @@ namespace nnforge { } - hessian_calculator_smart_ptr hessian_calculator_cuda_factory::create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const + hessian_calculator_smart_ptr hessian_calculator_cuda_factory::create(network_schema_smart_ptr schema) const { - return hessian_calculator_smart_ptr(new hessian_calculator_cuda(schema, scale_params, cuda_config)); + return hessian_calculator_smart_ptr(new hessian_calculator_cuda(schema, cuda_config)); } } } diff --git a/nnforge/cuda/hessian_calculator_cuda_factory.h b/nnforge/cuda/hessian_calculator_cuda_factory.h index 53dcc89..cada0ae 100644 --- a/nnforge/cuda/hessian_calculator_cuda_factory.h +++ b/nnforge/cuda/hessian_calculator_cuda_factory.h @@ -30,9 +30,7 @@ namespace nnforge virtual ~hessian_calculator_cuda_factory(); - virtual hessian_calculator_smart_ptr create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const; + virtual hessian_calculator_smart_ptr create(network_schema_smart_ptr schema) const; protected: cuda_running_configuration_const_smart_ptr cuda_config; diff --git a/nnforge/cuda/network_tester_cuda.cu b/nnforge/cuda/network_tester_cuda.cu index 8332995..ab7ec5e 100644 --- a/nnforge/cuda/network_tester_cuda.cu +++ b/nnforge/cuda/network_tester_cuda.cu @@ -27,24 +27,20 @@ #include __global__ void convert_compacted_to_raw_kernel( - const unsigned char * __restrict input, - float * __restrict output, - const float * __restrict scale_addition, - const float * __restrict scale_multiplication, - int elem_count_per_feature_map, - int feature_map_count, - int entry_count) + const uchar4 * __restrict input, + float4 * __restrict output, + int elem_count) { - int elem_id_inside_feature_map = blockIdx.x * blockDim.x + threadIdx.x; - int feature_map_id = blockIdx.y * blockDim.y + threadIdx.y; - int entry_id = blockIdx.z * blockDim.z + threadIdx.z; - bool in_bounds = (entry_id < entry_count) && (elem_id_inside_feature_map < elem_count_per_feature_map) && (feature_map_id < feature_map_count); - if (in_bounds) + int elem_id = blockDim.x * (blockIdx.y * gridDim.x + blockIdx.x) + threadIdx.x; + if (elem_id < elem_count) { - int offset = elem_count_per_feature_map * (entry_id * feature_map_count + feature_map_id) + elem_id_inside_feature_map; - unsigned char val = input[offset]; - float converted_val = ((val * (1.0F / 255.0F)) + scale_addition[feature_map_id]) * scale_multiplication[feature_map_id]; - output[offset] = converted_val; + uchar4 inp = input[elem_id]; + float4 val; + val.x = inp.x * (1.0F / 255.0F); + val.y = inp.y * (1.0F / 255.0F); + val.z = inp.z * (1.0F / 255.0F); + val.w = inp.w * (1.0F / 255.0F); + output[elem_id] = val; } } @@ -54,9 +50,8 @@ namespace nnforge { network_tester_cuda::network_tester_cuda( network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params, cuda_running_configuration_const_smart_ptr cuda_config) - : network_tester(schema, scale_params) + : network_tester(schema) , cuda_config(cuda_config) { const const_layer_list& layer_list = *schema; @@ -118,14 +113,6 @@ namespace nnforge *it_conf, *(it_conf + 1))); } - - scale_multiplication = cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device( - &(*current_scale_params->multiplication_list.begin()), - current_scale_params->multiplication_list.size() * sizeof(float))); - - scale_addition = cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device( - &(*current_scale_params->addition_list.begin()), - current_scale_params->addition_list.size() * sizeof(float))); } void network_tester_cuda::update_buffers_configuration_testing(buffer_cuda_size_configuration& buffer_configuration) const @@ -133,8 +120,6 @@ namespace nnforge for(std::vector >::const_iterator it = net_data.begin(); it != net_data.end(); ++it) for(std::vector::const_iterator it2 = it->begin(); it2 != it->end(); ++it2) buffer_configuration.add_constant_buffer((*it2)->get_size()); - buffer_configuration.add_constant_buffer(scale_addition->get_size()); - buffer_configuration.add_constant_buffer(scale_multiplication->get_size()); for(std::vector >::const_iterator it = schema_data.begin(); it != schema_data.end(); ++it) for(std::vector::const_iterator it2 = it->begin(); it2 != it->end(); ++it2) @@ -145,7 +130,7 @@ namespace nnforge } void network_tester_cuda::actual_test( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, testing_complete_result_set& result) { reader.reset(); @@ -156,17 +141,17 @@ namespace nnforge unsigned int input_neuron_count = input_configuration.get_neuron_count(); unsigned int input_neuron_count_per_feature_map = input_configuration.get_neuron_count_per_feature_map(); unsigned int output_neuron_count = output_configuration.get_neuron_count(); - unsigned int input_feature_map_count = input_configuration.feature_map_count; - unsigned int entry_count = reader.get_entry_count(); + neuron_data_type::input_type type_code = reader.get_input_type(); + size_t input_neuron_elem_size = reader.get_input_neuron_elem_size(); result.mse = testing_result_smart_ptr(new testing_result(output_neuron_count)); buffer_cuda_size_configuration buffers_config; update_buffers_configuration_testing(buffers_config); - buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(unsigned char)); // input - buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(unsigned char)); // input + buffers_config.add_per_entry_buffer(input_neuron_count * input_neuron_elem_size); // input + buffers_config.add_per_entry_buffer(input_neuron_count * input_neuron_elem_size); // input buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(float)); // converted input buffers_config.add_per_entry_buffer(output_neuron_count * sizeof(float)); // output buffers_config.add_per_entry_buffer(output_neuron_count * sizeof(float)); // output @@ -175,8 +160,8 @@ namespace nnforge cuda_linear_buffer_device_smart_ptr input_buf[2] = { - cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * sizeof(unsigned char))), - cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * sizeof(unsigned char))), + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * input_neuron_elem_size)), + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * input_neuron_elem_size)), }; cuda_linear_buffer_device_smart_ptr output_buf[2] = @@ -196,7 +181,7 @@ namespace nnforge output_buffer = (*it)->get_output_buffer(output_buffer, additional_buffers); } - cuda_linear_buffer_host_smart_ptr input_host_buf(new cuda_linear_buffer_host(input_neuron_count * max_entry_count * sizeof(unsigned char))); + cuda_linear_buffer_host_smart_ptr input_host_buf(new cuda_linear_buffer_host(input_neuron_count * max_entry_count * input_neuron_elem_size)); unsigned char * input = *input_host_buf; cuda_linear_buffer_host_smart_ptr output_predicted_host_buf(new cuda_linear_buffer_host(output_neuron_count * max_entry_count * sizeof(float))); float * output_predicted = *output_predicted_host_buf; @@ -220,21 +205,27 @@ namespace nnforge if (entries_available_for_processing_count > 0) { // Convert input + if (type_code == neuron_data_type::type_byte) { - std::pair convert_compacted_to_raw_2d_surf_kernel_dims = cuda_util::get_grid_and_threadblock_sizes_sequential_access( + int elem_count = (input_neuron_count * entries_available_for_processing_count + 3) / 4; + std::pair kernel_dims = cuda_util::get_grid_and_threadblock_sizes_sequential_access( *cuda_config, - input_neuron_count_per_feature_map, - input_feature_map_count, - entries_available_for_processing_count); - convert_compacted_to_raw_kernel<<>>( + elem_count); + convert_compacted_to_raw_kernel<<>>( *input_buf[current_command_slot], *input_converted_buf, - *scale_addition, - *scale_multiplication, - input_neuron_count_per_feature_map, - input_feature_map_count, - entries_available_for_processing_count); + elem_count); } + else if (type_code == neuron_data_type::type_float) + { + cuda_safe_call(cudaMemcpyAsync( + *input_converted_buf, + *input_buf[current_command_slot], + input_neuron_count * entries_available_for_processing_count * sizeof(float), + cudaMemcpyDeviceToDevice, + *command_stream)); + } + else throw neural_network_exception((boost::format("actual_run cannot handle input neurons of type %1%") % type_code).str()); // Run ann { @@ -312,7 +303,7 @@ namespace nnforge while(entries_read_count < max_entry_count) { bool entry_read = reader.read( - input + (input_neuron_count * entries_read_count), + input + (input_neuron_count * entries_read_count * input_neuron_elem_size), &(*output_actual[current_data_slot].begin()) + (output_neuron_count * entries_read_count)); if (!entry_read) @@ -323,7 +314,7 @@ namespace nnforge cuda_safe_call(cudaMemcpyAsync( *(input_buf[current_data_slot]), input, - entries_read_count * input_neuron_count * sizeof(unsigned char), + entries_read_count * input_neuron_count * input_neuron_elem_size, cudaMemcpyHostToDevice, *data_stream)); } @@ -342,7 +333,7 @@ namespace nnforge result.mse->entry_count = entries_processed_count; } - output_neuron_value_set_smart_ptr network_tester_cuda::actual_run(unsupervised_data_reader_byte& reader) + output_neuron_value_set_smart_ptr network_tester_cuda::actual_run(unsupervised_data_reader& reader) { reader.reset(); @@ -350,18 +341,18 @@ namespace nnforge unsigned int input_neuron_count = layer_config_list.begin()->get_neuron_count(); unsigned int input_neuron_count_per_feature_map = layer_config_list.begin()->get_neuron_count_per_feature_map(); - unsigned int input_feature_map_count = layer_config_list.begin()->feature_map_count; unsigned int output_neuron_count = layer_config_list.end()->get_neuron_count(); - unsigned int entry_count = reader.get_entry_count(); + neuron_data_type::input_type type_code = reader.get_input_type(); + size_t input_neuron_elem_size = reader.get_input_neuron_elem_size(); output_neuron_value_set_smart_ptr predicted_output_neuron_value_set(new output_neuron_value_set(entry_count, output_neuron_count)); buffer_cuda_size_configuration buffers_config; update_buffers_configuration_testing(buffers_config); - buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(unsigned char)); // input - buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(unsigned char)); // input + buffers_config.add_per_entry_buffer(input_neuron_count * input_neuron_elem_size); // input + buffers_config.add_per_entry_buffer(input_neuron_count * input_neuron_elem_size); // input buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(float)); // converted input buffers_config.add_per_entry_buffer(output_neuron_count * sizeof(float)); // output buffers_config.add_per_entry_buffer(output_neuron_count * sizeof(float)); // output @@ -370,8 +361,8 @@ namespace nnforge cuda_linear_buffer_device_smart_ptr input_buf[2] = { - cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * sizeof(unsigned char))), - cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * sizeof(unsigned char))), + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * input_neuron_elem_size)), + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * input_neuron_elem_size)), }; cuda_linear_buffer_device_smart_ptr output_buf[2] = @@ -391,7 +382,7 @@ namespace nnforge output_buffer = (*it)->get_output_buffer(output_buffer, additional_buffers); } - cuda_linear_buffer_host_smart_ptr input_host_buf(new cuda_linear_buffer_host(input_neuron_count * max_entry_count * sizeof(unsigned char))); + cuda_linear_buffer_host_smart_ptr input_host_buf(new cuda_linear_buffer_host(input_neuron_count * max_entry_count * input_neuron_elem_size)); unsigned char * input = *input_host_buf; cuda_linear_buffer_host_smart_ptr output_predicted_host_buf(new cuda_linear_buffer_host(output_neuron_count * max_entry_count * sizeof(float))); float * output_predicted = *output_predicted_host_buf; @@ -410,21 +401,27 @@ namespace nnforge if (entries_available_for_processing_count > 0) { // Convert input + if (type_code == neuron_data_type::type_byte) { - std::pair convert_compacted_to_raw_2d_surf_kernel_dims = cuda_util::get_grid_and_threadblock_sizes_sequential_access( + int elem_count = (input_neuron_count * entries_available_for_processing_count + 3) / 4; + std::pair kernel_dims = cuda_util::get_grid_and_threadblock_sizes_sequential_access( *cuda_config, - input_neuron_count_per_feature_map, - input_feature_map_count, - entries_available_for_processing_count); - convert_compacted_to_raw_kernel<<>>( + elem_count); + convert_compacted_to_raw_kernel<<>>( *input_buf[current_command_slot], *input_converted_buf, - *scale_addition, - *scale_multiplication, - input_neuron_count_per_feature_map, - input_feature_map_count, - entries_available_for_processing_count); + elem_count); } + else if (type_code == neuron_data_type::type_float) + { + cuda_safe_call(cudaMemcpyAsync( + *input_converted_buf, + *input_buf[current_command_slot], + input_neuron_count * entries_available_for_processing_count * sizeof(float), + cudaMemcpyDeviceToDevice, + *command_stream)); + } + else throw neural_network_exception((boost::format("actual_run cannot handle input neurons of type %1%") % type_code).str()); // Run ann { @@ -487,7 +484,7 @@ namespace nnforge { while(entries_read_count < max_entry_count) { - bool entry_read = reader.read(input + (input_neuron_count * entries_read_count)); + bool entry_read = reader.read(input + (input_neuron_count * entries_read_count * input_neuron_elem_size)); if (!entry_read) break; @@ -497,7 +494,7 @@ namespace nnforge cuda_safe_call(cudaMemcpyAsync( *(input_buf[current_data_slot]), input, - entries_read_count * input_neuron_count * sizeof(unsigned char), + entries_read_count * input_neuron_count * input_neuron_elem_size, cudaMemcpyHostToDevice, *data_stream)); } @@ -517,16 +514,18 @@ namespace nnforge return predicted_output_neuron_value_set; } - std::vector network_tester_cuda::actual_get_snapshot(std::vector& input) + std::vector network_tester_cuda::actual_get_snapshot( + const void * input, + neuron_data_type::input_type type_code) { std::vector res; unsigned int input_neuron_count = layer_config_list.begin()->get_neuron_count(); unsigned int input_neuron_count_per_feature_map = layer_config_list.begin()->get_neuron_count_per_feature_map(); - unsigned int input_feature_map_count = layer_config_list.begin()->feature_map_count; unsigned int output_neuron_count = (layer_config_list.end() - 1)->get_neuron_count(); + size_t input_neuron_elem_size = neuron_data_type::get_input_size(type_code); - cuda_linear_buffer_device_smart_ptr input_buf(new cuda_linear_buffer_device(input_neuron_count * sizeof(unsigned char))); + cuda_linear_buffer_device_smart_ptr input_buf(new cuda_linear_buffer_device(input_neuron_count * input_neuron_elem_size)); cuda_linear_buffer_device_smart_ptr input_converted_buf(new cuda_linear_buffer_device(input_neuron_count * sizeof(float))); cuda_linear_buffer_device_smart_ptr output_buffer = input_converted_buf; @@ -541,35 +540,37 @@ namespace nnforge output_buffer_list.push_back(output_buffer); } - // Copy inout + // Copy input { cuda_safe_call(cudaMemcpyAsync( *input_buf, - &(*input.begin()), - input_neuron_count * sizeof(unsigned char), + input, + input_neuron_count * input_neuron_elem_size, cudaMemcpyHostToDevice, *command_stream)); } // Convert input + if (type_code == neuron_data_type::type_byte) { - std::pair convert_compacted_to_raw_2d_surf_kernel_dims = cuda_util::get_grid_and_threadblock_sizes_sequential_access( + int elem_count = (input_neuron_count + 3) / 4; + std::pair kernel_dims = cuda_util::get_grid_and_threadblock_sizes_sequential_access( *cuda_config, - input_neuron_count_per_feature_map, - input_feature_map_count, - 1); - convert_compacted_to_raw_kernel<<>>( + elem_count); + convert_compacted_to_raw_kernel<<>>( *input_buf, *input_converted_buf, - *scale_addition, - *scale_multiplication, - input_neuron_count_per_feature_map, - input_feature_map_count, - 1); + elem_count); + } + else if (type_code == neuron_data_type::type_float) + { + cuda_safe_call(cudaMemcpyAsync(*input_converted_buf, *input_buf, input_neuron_count * sizeof(float), cudaMemcpyDeviceToDevice, *command_stream)); + } + else throw neural_network_exception((boost::format("actual_get_snapshot cannot handle input neurons of type %1%") % type_code).str()); + { layer_configuration_specific_snapshot_smart_ptr input_elem(new layer_configuration_specific_snapshot(layer_config_list[0])); res.push_back(input_elem); - cuda_safe_call(cudaMemcpyAsync( &(*(input_elem->data.begin())), *output_buffer_list[0], @@ -615,16 +616,18 @@ namespace nnforge return res; } - layer_configuration_specific_snapshot_smart_ptr network_tester_cuda::actual_run(std::vector& input) + layer_configuration_specific_snapshot_smart_ptr network_tester_cuda::actual_run( + const void * input, + neuron_data_type::input_type type_code) { layer_configuration_specific_snapshot_smart_ptr res(new layer_configuration_specific_snapshot(layer_config_list[layer_config_list.size() - 1])); unsigned int input_neuron_count = layer_config_list.begin()->get_neuron_count(); unsigned int input_neuron_count_per_feature_map = layer_config_list.begin()->get_neuron_count_per_feature_map(); - unsigned int input_feature_map_count = layer_config_list.begin()->feature_map_count; unsigned int output_neuron_count = (layer_config_list.end() - 1)->get_neuron_count(); + size_t input_neuron_elem_size = neuron_data_type::get_input_size(type_code); - cuda_linear_buffer_device_smart_ptr input_buf(new cuda_linear_buffer_device(input_neuron_count * sizeof(unsigned char))); + cuda_linear_buffer_device_smart_ptr input_buf(new cuda_linear_buffer_device(input_neuron_count * input_neuron_elem_size)); cuda_linear_buffer_device_smart_ptr input_converted_buf(new cuda_linear_buffer_device(input_neuron_count * sizeof(float))); cuda_linear_buffer_device_smart_ptr output_buffer = input_converted_buf; @@ -636,32 +639,33 @@ namespace nnforge output_buffer = (*it)->get_output_buffer(output_buffer, additional_buffers); } - // Copy inout + // Copy input { cuda_safe_call(cudaMemcpyAsync( *input_buf, - &(*input.begin()), - input_neuron_count * sizeof(unsigned char), + input, + input_neuron_count * input_neuron_elem_size, cudaMemcpyHostToDevice, *command_stream)); } // Convert input + if (type_code == neuron_data_type::type_byte) { - std::pair convert_compacted_to_raw_2d_surf_kernel_dims = cuda_util::get_grid_and_threadblock_sizes_sequential_access( + int elem_count = (input_neuron_count + 3) / 4; + std::pair kernel_dims = cuda_util::get_grid_and_threadblock_sizes_sequential_access( *cuda_config, - input_neuron_count_per_feature_map, - input_feature_map_count, - 1); - convert_compacted_to_raw_kernel<<>>( + elem_count); + convert_compacted_to_raw_kernel<<>>( *input_buf, *input_converted_buf, - *scale_addition, - *scale_multiplication, - input_neuron_count_per_feature_map, - input_feature_map_count, - 1); + elem_count); + } + else if (type_code == neuron_data_type::type_float) + { + cuda_safe_call(cudaMemcpyAsync(*input_converted_buf, *input_buf, input_neuron_count * sizeof(float), cudaMemcpyDeviceToDevice, *command_stream)); } + else throw neural_network_exception((boost::format("actual_run cannot handle input neurons of type %1%") % type_code).str()); // Run ann { diff --git a/nnforge/cuda/network_tester_cuda.h b/nnforge/cuda/network_tester_cuda.h index c2fb75a..d73253b 100644 --- a/nnforge/cuda/network_tester_cuda.h +++ b/nnforge/cuda/network_tester_cuda.h @@ -36,7 +36,6 @@ namespace nnforge public: network_tester_cuda( network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params, cuda_running_configuration_const_smart_ptr cuda_config); virtual ~network_tester_cuda(); @@ -44,20 +43,24 @@ namespace nnforge protected: // schema, data and reader are guaranteed to be compatible virtual void actual_test( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, testing_complete_result_set& result); // schema, data and reader are guaranteed to be compatible - virtual output_neuron_value_set_smart_ptr actual_run(unsupervised_data_reader_byte& reader); + virtual output_neuron_value_set_smart_ptr actual_run(unsupervised_data_reader& reader); // The method is called when client calls set_data. The data is guaranteed to be compatible with schema virtual void actual_set_data(network_data_smart_ptr data); // The method is called when client calls get_snapshot. The data is guaranteed to be compatible with schema - virtual std::vector actual_get_snapshot(std::vector& input); + virtual std::vector actual_get_snapshot( + const void * input, + neuron_data_type::input_type type_code); // The method is called when client calls get_snapshot. The data is guaranteed to be compatible with schema - virtual layer_configuration_specific_snapshot_smart_ptr actual_run(std::vector& input); + virtual layer_configuration_specific_snapshot_smart_ptr actual_run( + const void * input, + neuron_data_type::input_type type_code); // The method is called when client calls set_input_configuration_specific and the convolution specific configuration is modified. // The layer_config_list is guaranteed to be compatible with schema @@ -80,8 +83,6 @@ namespace nnforge std::vector > net_data; std::vector > schema_data; - const_cuda_linear_buffer_device_smart_ptr scale_addition; - const_cuda_linear_buffer_device_smart_ptr scale_multiplication; std::vector tester_list; }; diff --git a/nnforge/cuda/network_tester_cuda_factory.cpp b/nnforge/cuda/network_tester_cuda_factory.cpp index 96360ab..1fe9410 100644 --- a/nnforge/cuda/network_tester_cuda_factory.cpp +++ b/nnforge/cuda/network_tester_cuda_factory.cpp @@ -31,11 +31,9 @@ namespace nnforge { } - network_tester_smart_ptr network_tester_cuda_factory::create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const + network_tester_smart_ptr network_tester_cuda_factory::create(network_schema_smart_ptr schema) const { - return network_tester_smart_ptr(new network_tester_cuda(schema, scale_params, cuda_config)); + return network_tester_smart_ptr(new network_tester_cuda(schema, cuda_config)); } } } diff --git a/nnforge/cuda/network_tester_cuda_factory.h b/nnforge/cuda/network_tester_cuda_factory.h index d3e6890..559ca09 100644 --- a/nnforge/cuda/network_tester_cuda_factory.h +++ b/nnforge/cuda/network_tester_cuda_factory.h @@ -30,9 +30,7 @@ namespace nnforge virtual ~network_tester_cuda_factory(); - virtual network_tester_smart_ptr create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const; + virtual network_tester_smart_ptr create(network_schema_smart_ptr schema) const; protected: cuda_running_configuration_const_smart_ptr cuda_config; diff --git a/nnforge/cuda/network_updater_cuda.cu b/nnforge/cuda/network_updater_cuda.cu index cd4943f..aacb5c1 100644 --- a/nnforge/cuda/network_updater_cuda.cu +++ b/nnforge/cuda/network_updater_cuda.cu @@ -29,24 +29,20 @@ #include __global__ void convert_compacted_to_raw_upd_kernel( - const unsigned char * __restrict input, - float * __restrict output, - const float * __restrict scale_addition, - const float * __restrict scale_multiplication, - int elem_count_per_feature_map, - int feature_map_count, - int entry_count) + const uchar4 * __restrict input, + float4 * __restrict output, + int elem_count) { - int elem_id_inside_feature_map = blockIdx.x * blockDim.x + threadIdx.x; - int feature_map_id = blockIdx.y * blockDim.y + threadIdx.y; - int entry_id = blockIdx.z * blockDim.z + threadIdx.z; - bool in_bounds = (entry_id < entry_count) && (elem_id_inside_feature_map < elem_count_per_feature_map) && (feature_map_id < feature_map_count); - if (in_bounds) + int elem_id = blockDim.x * (blockIdx.y * gridDim.x + blockIdx.x) + threadIdx.x; + if (elem_id < elem_count) { - int offset = elem_count_per_feature_map * (entry_id * feature_map_count + feature_map_id) + elem_id_inside_feature_map; - unsigned char val = input[offset]; - float converted_val = ((val * (1.0F / 255.0F)) + scale_addition[feature_map_id]) * scale_multiplication[feature_map_id]; - output[offset] = converted_val; + uchar4 inp = input[elem_id]; + float4 val; + val.x = inp.x * (1.0F / 255.0F); + val.y = inp.y * (1.0F / 255.0F); + val.z = inp.z * (1.0F / 255.0F); + val.w = inp.w * (1.0F / 255.0F); + output[elem_id] = val; } } @@ -79,9 +75,8 @@ namespace nnforge network_updater_cuda::network_updater_cuda( network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params, cuda_running_configuration_const_smart_ptr cuda_config) - : network_updater(schema, scale_params) + : network_updater(schema) , cuda_config(cuda_config) { const const_layer_list& layer_list = *schema; @@ -124,7 +119,7 @@ namespace nnforge } std::vector network_updater_cuda::actual_update( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, const std::vector& training_speed_vector_list, std::vector& data_list, const std::map& layer_to_dropout_rate_map, @@ -145,7 +140,8 @@ namespace nnforge unsigned int input_neuron_count = input_configuration.get_neuron_count(); unsigned int output_neuron_count = output_configuration.get_neuron_count(); unsigned int input_neuron_count_per_feature_map = input_configuration.get_neuron_count_per_feature_map(); - unsigned int input_feature_map_count = input_configuration.feature_map_count; + neuron_data_type::input_type type_code = reader.get_input_type(); + size_t input_neuron_elem_size = reader.get_input_neuron_elem_size(); unsigned int updater_entry_count = static_cast(data_list.size()); if (updater_entry_count == 0) @@ -160,8 +156,8 @@ namespace nnforge buffer_cuda_size_configuration buffers_config; update_buffers_configuration(buffers_config, updater_entry_count); - buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(unsigned char)); // input - buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(unsigned char)); // input + buffers_config.add_per_entry_buffer(input_neuron_count * input_neuron_elem_size); // input + buffers_config.add_per_entry_buffer(input_neuron_count * input_neuron_elem_size); // input buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(float)); // converted input buffers_config.add_per_entry_buffer(output_neuron_count * sizeof(float)); // output buffers_config.add_per_entry_buffer(output_neuron_count * sizeof(float)); // output @@ -182,8 +178,8 @@ namespace nnforge cuda_linear_buffer_device_smart_ptr input_buf[2] = { - cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * sizeof(unsigned char))), - cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * sizeof(unsigned char))), + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * input_neuron_elem_size)), + cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * input_neuron_elem_size)), }; cuda_linear_buffer_device_smart_ptr output_buf[2] = @@ -232,7 +228,7 @@ namespace nnforge output_errors = all_buffers.input_errors_buffer; } - cuda_linear_buffer_host_smart_ptr input_host_buf(new cuda_linear_buffer_host(input_neuron_count * max_entry_count * sizeof(unsigned char))); + cuda_linear_buffer_host_smart_ptr input_host_buf(new cuda_linear_buffer_host(input_neuron_count * max_entry_count * input_neuron_elem_size)); unsigned char * input = *input_host_buf; cuda_linear_buffer_host_smart_ptr output_host_buf(new cuda_linear_buffer_host(output_neuron_count * max_entry_count * sizeof(float))); float * output = *output_host_buf; @@ -265,21 +261,27 @@ namespace nnforge if (entries_available_for_processing_count > 0) { // Convert input + if (type_code == neuron_data_type::type_byte) { - std::pair convert_compacted_to_raw_2d_surf_kernel_dims = cuda_util::get_grid_and_threadblock_sizes_sequential_access( + int elem_count = (input_neuron_count * entries_available_for_processing_count + 3) / 4; + std::pair kernel_dims = cuda_util::get_grid_and_threadblock_sizes_sequential_access( *cuda_config, - input_neuron_count_per_feature_map, - input_feature_map_count, - entries_available_for_processing_count); - convert_compacted_to_raw_upd_kernel<<>>( + elem_count); + convert_compacted_to_raw_upd_kernel<<>>( *input_buf[current_command_slot], *input_converted_buf, - *scale_addition, - *scale_multiplication, - input_neuron_count_per_feature_map, - input_feature_map_count, - entries_available_for_processing_count); + elem_count); + } + else if (type_code == neuron_data_type::type_float) + { + cuda_safe_call(cudaMemcpyAsync( + *input_converted_buf, + *input_buf[current_command_slot], + input_neuron_count * entries_available_for_processing_count * sizeof(float), + cudaMemcpyDeviceToDevice, + *command_stream)); } + else throw neural_network_exception((boost::format("actual_update cannot handle input neurons of type %1%") % type_code).str()); // Run ann { @@ -435,7 +437,7 @@ namespace nnforge while(entries_read_count < entries_to_read_count) { bool entry_read = reader.read( - input + (input_neuron_count * entries_read_count), + input + (input_neuron_count * entries_read_count * input_neuron_elem_size), output + (output_neuron_count * entries_read_count)); if (!entry_read) @@ -446,7 +448,7 @@ namespace nnforge cuda_safe_call(cudaMemcpyAsync( *(input_buf[current_data_slot]), input, - entries_read_count * input_neuron_count * sizeof(unsigned char), + entries_read_count * input_neuron_count * input_neuron_elem_size, cudaMemcpyHostToDevice, *data_stream)); cuda_safe_call(cudaMemcpyAsync( @@ -505,14 +507,6 @@ namespace nnforge (it_conf > layer_config_list.begin() + testing_layer_count), (it_conf > layer_config_list.begin() + testing_layer_count))); } - - scale_multiplication = cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device( - &(*current_scale_params->multiplication_list.begin()), - current_scale_params->multiplication_list.size() * sizeof(float))); - - scale_addition = cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device( - &(*current_scale_params->addition_list.begin()), - current_scale_params->addition_list.size() * sizeof(float))); } std::vector > network_updater_cuda::enqueue_get_training_speed( @@ -617,9 +611,6 @@ namespace nnforge buffer_cuda_size_configuration& buffer_configuration, unsigned int updater_entry_count) const { - buffer_configuration.add_constant_buffer(scale_addition->get_size()); - buffer_configuration.add_constant_buffer(scale_multiplication->get_size()); - for(std::vector >::const_iterator it = testing_schema_data.begin(); it != testing_schema_data.end(); ++it) for(std::vector::const_iterator it2 = it->begin(); it2 != it->end(); ++it2) buffer_configuration.add_constant_buffer((*it2)->get_size()); diff --git a/nnforge/cuda/network_updater_cuda.h b/nnforge/cuda/network_updater_cuda.h index 578950d..7b0f3a2 100644 --- a/nnforge/cuda/network_updater_cuda.h +++ b/nnforge/cuda/network_updater_cuda.h @@ -32,7 +32,6 @@ namespace nnforge public: network_updater_cuda( network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params, cuda_running_configuration_const_smart_ptr cuda_config); virtual ~network_updater_cuda(); @@ -42,7 +41,7 @@ namespace nnforge protected: // schema, data and reader are guaranteed to be compatible virtual std::vector actual_update( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, const std::vector& training_speed_vector_list, std::vector& data_list, const std::map& layer_to_dropout_rate_map, @@ -83,9 +82,6 @@ namespace nnforge unsigned int testing_layer_count; const_layer_list::const_iterator start_layer_nonempty_weights_iterator; - const_cuda_linear_buffer_device_smart_ptr scale_addition; - const_cuda_linear_buffer_device_smart_ptr scale_multiplication; - const_layer_testing_schema_list testing_schemas; std::vector > testing_schema_data; std::vector tester_list; diff --git a/nnforge/cuda/network_updater_cuda_factory.cpp b/nnforge/cuda/network_updater_cuda_factory.cpp index 14c1753..404bd0d 100644 --- a/nnforge/cuda/network_updater_cuda_factory.cpp +++ b/nnforge/cuda/network_updater_cuda_factory.cpp @@ -31,11 +31,9 @@ namespace nnforge { } - network_updater_smart_ptr network_updater_cuda_factory::create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const + network_updater_smart_ptr network_updater_cuda_factory::create(network_schema_smart_ptr schema) const { - return network_updater_smart_ptr(new network_updater_cuda(schema, scale_params, cuda_config)); + return network_updater_smart_ptr(new network_updater_cuda(schema, cuda_config)); } } } diff --git a/nnforge/cuda/network_updater_cuda_factory.h b/nnforge/cuda/network_updater_cuda_factory.h index db2ccef..c17d8a4 100644 --- a/nnforge/cuda/network_updater_cuda_factory.h +++ b/nnforge/cuda/network_updater_cuda_factory.h @@ -30,9 +30,7 @@ namespace nnforge virtual ~network_updater_cuda_factory(); - virtual network_updater_smart_ptr create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const; + virtual network_updater_smart_ptr create(network_schema_smart_ptr schema) const; protected: cuda_running_configuration_const_smart_ptr cuda_config; diff --git a/nnforge/cuda/neural_network_cuda_exception.cpp b/nnforge/cuda/neural_network_cuda_exception.cpp index e9fc71d..2c2294b 100644 --- a/nnforge/cuda/neural_network_cuda_exception.cpp +++ b/nnforge/cuda/neural_network_cuda_exception.cpp @@ -15,13 +15,14 @@ */ #include "neural_network_cuda_exception.h" +#include namespace nnforge { namespace cuda { neural_network_cuda_exception::neural_network_cuda_exception(cudaError_t error_code) - : neural_network_exception(cudaGetErrorString(error_code)) + : neural_network_exception((boost::format("CUDA error: %1%") % cudaGetErrorString(error_code)).str()) { } } diff --git a/nnforge/data_scale_params.cpp b/nnforge/data_scale_params.cpp deleted file mode 100644 index 6997564..0000000 --- a/nnforge/data_scale_params.cpp +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2011-2013 Maxim Milakov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "data_scale_params.h" - -namespace nnforge -{ - data_scale_params::data_scale_params() - : feature_map_count(0) - { - } - - data_scale_params::data_scale_params(unsigned int feature_map_count) - : feature_map_count(feature_map_count) - , addition_list(feature_map_count, 0.0F) - , multiplication_list(feature_map_count, 1.0F) - { - } - - void data_scale_params::write(std::ostream& output_stream) const - { - output_stream.write(reinterpret_cast(&feature_map_count), sizeof(feature_map_count)); - - output_stream.write(reinterpret_cast(&(*addition_list.begin())), sizeof(*addition_list.begin()) * feature_map_count); - output_stream.write(reinterpret_cast(&(*multiplication_list.begin())), sizeof(*multiplication_list.begin()) * feature_map_count); - } - - void data_scale_params::read(std::istream& input_stream) - { - input_stream.read(reinterpret_cast(&feature_map_count), sizeof(feature_map_count)); - - addition_list.resize(feature_map_count); - multiplication_list.resize(feature_map_count); - - input_stream.read(reinterpret_cast(&(*addition_list.begin())), sizeof(*addition_list.begin()) * feature_map_count); - input_stream.read(reinterpret_cast(&(*multiplication_list.begin())), sizeof(*multiplication_list.begin()) * feature_map_count); - } -} diff --git a/nnforge/hessian_calculator.cpp b/nnforge/hessian_calculator.cpp index c2d5b91..69bc7fa 100644 --- a/nnforge/hessian_calculator.cpp +++ b/nnforge/hessian_calculator.cpp @@ -21,11 +21,8 @@ namespace nnforge { - hessian_calculator::hessian_calculator( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) + hessian_calculator::hessian_calculator(network_schema_smart_ptr schema) : schema(schema) - , scale_params(scale_params) { } @@ -38,16 +35,6 @@ namespace nnforge if ((layer_config_list.size() > 0) && (layer_config_list[0] == input_configuration_specific)) return; - if (scale_params == 0) - current_scale_params = const_data_scale_params_smart_ptr(new data_scale_params(input_configuration_specific.feature_map_count)); - else - { - current_scale_params = scale_params; - if (current_scale_params->feature_map_count != input_configuration_specific.feature_map_count) - throw neural_network_exception((boost::format("Feature map counts for scaling and in input data don't match: %1% and %2%") - % current_scale_params->feature_map_count % input_configuration_specific.feature_map_count).str()); - } - layer_config_list = schema->get_layer_configuration_specific_list(input_configuration_specific); update_flops(); @@ -56,7 +43,7 @@ namespace nnforge } network_data_smart_ptr hessian_calculator::get_hessian( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, network_data_smart_ptr data, unsigned int hessian_entry_to_process_count) { diff --git a/nnforge/hessian_calculator.h b/nnforge/hessian_calculator.h index a2bf546..d8357ac 100644 --- a/nnforge/hessian_calculator.h +++ b/nnforge/hessian_calculator.h @@ -20,7 +20,6 @@ #include "network_data.h" #include "layer_configuration_specific.h" #include "supervised_data_reader.h" -#include "data_scale_params.h" #include @@ -31,11 +30,11 @@ namespace nnforge public: virtual ~hessian_calculator(); - // You don't need to call this method before calling get_hessian with supervised_data_reader_byte + // You don't need to call this method before calling get_hessian with supervised_data_reader void set_input_configuration_specific(const layer_configuration_specific& input_configuration_specific); network_data_smart_ptr get_hessian( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, network_data_smart_ptr data, unsigned int hessian_entry_to_process_count); @@ -43,13 +42,11 @@ namespace nnforge float get_flops_for_single_entry() const; protected: - hessian_calculator( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params); + hessian_calculator(network_schema_smart_ptr schema); // schema, data and reader are guaranteed to be compatible virtual network_data_smart_ptr actual_get_hessian( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, network_data_smart_ptr data, unsigned int hessian_entry_to_process_count) = 0; @@ -62,15 +59,12 @@ namespace nnforge protected: network_schema_smart_ptr schema; layer_configuration_specific_list layer_config_list; - const_data_scale_params_smart_ptr current_scale_params; // Defined in set_input_configuration_specific float flops; private: hessian_calculator(); hessian_calculator(const hessian_calculator&); hessian_calculator& operator =(const hessian_calculator&); - - const_data_scale_params_smart_ptr scale_params; }; typedef std::tr1::shared_ptr hessian_calculator_smart_ptr; diff --git a/nnforge/hessian_calculator_factory.h b/nnforge/hessian_calculator_factory.h index da9954d..e5dfa1c 100644 --- a/nnforge/hessian_calculator_factory.h +++ b/nnforge/hessian_calculator_factory.h @@ -26,9 +26,7 @@ namespace nnforge public: virtual ~hessian_calculator_factory(); - virtual hessian_calculator_smart_ptr create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const = 0; + virtual hessian_calculator_smart_ptr create(network_schema_smart_ptr schema) const = 0; protected: hessian_calculator_factory(); diff --git a/nnforge/network_tester.cpp b/nnforge/network_tester.cpp index 5f36667..8ed8bd5 100644 --- a/nnforge/network_tester.cpp +++ b/nnforge/network_tester.cpp @@ -22,11 +22,8 @@ namespace nnforge { - network_tester::network_tester( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) + network_tester::network_tester(network_schema_smart_ptr schema) : schema(schema) - , scale_params(scale_params) { } @@ -47,16 +44,6 @@ namespace nnforge if ((layer_config_list.size() > 0) && (layer_config_list[0] == input_configuration_specific)) return; - if (scale_params == 0) - current_scale_params = const_data_scale_params_smart_ptr(new data_scale_params(input_configuration_specific.feature_map_count)); - else - { - current_scale_params = scale_params; - if (current_scale_params->feature_map_count != input_configuration_specific.feature_map_count) - throw neural_network_exception((boost::format("Feature map counts for scaling and in input data don't match: %1% and %2%") - % current_scale_params->feature_map_count % input_configuration_specific.feature_map_count).str()); - } - layer_config_list = schema->get_layer_configuration_specific_list(input_configuration_specific); update_flops(); @@ -65,7 +52,7 @@ namespace nnforge } void network_tester::test( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, testing_complete_result_set& result) { boost::chrono::steady_clock::time_point start = boost::chrono::high_resolution_clock::now(); @@ -83,27 +70,65 @@ namespace nnforge result.mse->time_to_complete_seconds = sec.count(); } - output_neuron_value_set_smart_ptr network_tester::run(unsupervised_data_reader_byte& reader) + output_neuron_value_set_smart_ptr network_tester::run(unsupervised_data_reader& reader) { set_input_configuration_specific(reader.get_input_configuration()); return actual_run(reader); } - std::vector network_tester::get_snapshot(std::vector& input) + std::vector network_tester::get_snapshot( + const void * input, + neuron_data_type::input_type type_code, + unsigned int input_neuron_count) + { + // Check schema-reader consistency + layer_config_list[0].check_equality(static_cast(input_neuron_count)); + + return actual_get_snapshot(input, type_code); + } + + std::vector network_tester::get_snapshot(const std::vector& input) + { + // Check schema-reader consistency + layer_config_list[0].check_equality(static_cast(input.size())); + + return actual_get_snapshot(&(*input.begin()), neuron_data_type::type_byte); + } + + std::vector network_tester::get_snapshot(const std::vector& input) + { + // Check schema-reader consistency + layer_config_list[0].check_equality(static_cast(input.size())); + + return actual_get_snapshot(&(*input.begin()), neuron_data_type::type_float); + } + + layer_configuration_specific_snapshot_smart_ptr network_tester::run( + const void * input, + neuron_data_type::input_type type_code, + unsigned int input_neuron_count) + { + // Check schema-reader consistency + layer_config_list[0].check_equality(static_cast(input_neuron_count)); + + return actual_run(input, type_code); + } + + layer_configuration_specific_snapshot_smart_ptr network_tester::run(const std::vector& input) { // Check schema-reader consistency layer_config_list[0].check_equality(static_cast(input.size())); - return actual_get_snapshot(input); + return actual_run(&(*input.begin()), neuron_data_type::type_byte); } - layer_configuration_specific_snapshot_smart_ptr network_tester::run(std::vector& input) + layer_configuration_specific_snapshot_smart_ptr network_tester::run(const std::vector& input) { // Check schema-reader consistency layer_config_list[0].check_equality(static_cast(input.size())); - return actual_run(input); + return actual_run(&(*input.begin()), neuron_data_type::type_float); } void network_tester::update_flops() diff --git a/nnforge/network_tester.h b/nnforge/network_tester.h index 1486f66..7cb2c15 100644 --- a/nnforge/network_tester.h +++ b/nnforge/network_tester.h @@ -23,7 +23,7 @@ #include "testing_complete_result_set.h" #include "layer_configuration_specific.h" #include "layer_configuration_specific_snapshot.h" -#include "data_scale_params.h" +#include "neuron_data_type.h" #include #include @@ -37,45 +37,65 @@ namespace nnforge void set_data(network_data_smart_ptr data); - // You don't need to call this method before calling test with supervised_data_reader_byte + // You don't need to call this method before calling test with supervised_data_reader void set_input_configuration_specific(const layer_configuration_specific& input_configuration_specific); void test( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, testing_complete_result_set& result); - output_neuron_value_set_smart_ptr run(unsupervised_data_reader_byte& reader); + output_neuron_value_set_smart_ptr run(unsupervised_data_reader& reader); // You need to call set_input_configuration_specific before you call this method for the 1st time - std::vector get_snapshot(std::vector& input); + std::vector get_snapshot( + const void * input, + neuron_data_type::input_type type_code, + unsigned int input_neuron_count); // You need to call set_input_configuration_specific before you call this method for the 1st time - layer_configuration_specific_snapshot_smart_ptr run(std::vector& input); + std::vector get_snapshot(const std::vector& input); + + // You need to call set_input_configuration_specific before you call this method for the 1st time + std::vector get_snapshot(const std::vector& input); + + // You need to call set_input_configuration_specific before you call this method for the 1st time + layer_configuration_specific_snapshot_smart_ptr run( + const void * input, + neuron_data_type::input_type type_code, + unsigned int input_neuron_count); + + // You need to call set_input_configuration_specific before you call this method for the 1st time + layer_configuration_specific_snapshot_smart_ptr run(const std::vector& input); + + // You need to call set_input_configuration_specific before you call this method for the 1st time + layer_configuration_specific_snapshot_smart_ptr run(const std::vector& input); // set_input_configuration_specific should be called prior to this method call for this method to succeed float get_flops_for_single_entry() const; protected: - network_tester( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params); + network_tester(network_schema_smart_ptr schema); // schema, data and reader are guaranteed to be compatible virtual void actual_test( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, testing_complete_result_set& result) = 0; // schema, data and reader are guaranteed to be compatible - virtual output_neuron_value_set_smart_ptr actual_run(unsupervised_data_reader_byte& reader) = 0; + virtual output_neuron_value_set_smart_ptr actual_run(unsupervised_data_reader& reader) = 0; // The method is called when client calls set_data. The data is guaranteed to be compatible with schema virtual void actual_set_data(network_data_smart_ptr data) = 0; // The method is called when client calls get_snapshot. The data is guaranteed to be compatible with schema - virtual std::vector actual_get_snapshot(std::vector& input) = 0; + virtual std::vector actual_get_snapshot( + const void * input, + neuron_data_type::input_type type_code) = 0; // The method is called when client calls get_snapshot. The data is guaranteed to be compatible with schema - virtual layer_configuration_specific_snapshot_smart_ptr actual_run(std::vector& input) = 0; + virtual layer_configuration_specific_snapshot_smart_ptr actual_run( + const void * input, + neuron_data_type::input_type type_code) = 0; // The method is called when client calls set_input_configuration_specific and the convolution specific configuration is modified. // The layer_config_list is guaranteed to be compatible with schema @@ -85,7 +105,6 @@ namespace nnforge protected: network_schema_smart_ptr schema; - const_data_scale_params_smart_ptr current_scale_params; // Defined in set_input_configuration_specific layer_configuration_specific_list layer_config_list; float flops; @@ -93,8 +112,6 @@ namespace nnforge network_tester(); network_tester(const network_tester&); network_tester& operator =(const network_tester&); - - const_data_scale_params_smart_ptr scale_params; }; typedef std::tr1::shared_ptr network_tester_smart_ptr; diff --git a/nnforge/network_tester_factory.h b/nnforge/network_tester_factory.h index 5598d63..c9f1e03 100644 --- a/nnforge/network_tester_factory.h +++ b/nnforge/network_tester_factory.h @@ -26,9 +26,7 @@ namespace nnforge public: virtual ~network_tester_factory(); - virtual network_tester_smart_ptr create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const = 0; + virtual network_tester_smart_ptr create(network_schema_smart_ptr schema) const = 0; protected: network_tester_factory(); diff --git a/nnforge/network_trainer.cpp b/nnforge/network_trainer.cpp index 636170a..8dce667 100644 --- a/nnforge/network_trainer.cpp +++ b/nnforge/network_trainer.cpp @@ -36,7 +36,7 @@ namespace nnforge } void network_trainer::train( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, network_data_peeker& peeker, network_data_pusher& progress_pusher, network_data_pusher& pusher, diff --git a/nnforge/network_trainer.h b/nnforge/network_trainer.h index bf13dac..18323a8 100644 --- a/nnforge/network_trainer.h +++ b/nnforge/network_trainer.h @@ -35,7 +35,7 @@ namespace nnforge // If the layer is not in layer_to_dropout_rate_map then its droput rate is assumed to be 0.0F void train( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, network_data_peeker& peeker, network_data_pusher& progress_pusher, network_data_pusher& pusher, @@ -46,14 +46,14 @@ namespace nnforge protected: network_trainer(network_schema_smart_ptr schema); - virtual void initialize_train(supervised_data_reader_byte& reader) = 0; + virtual void initialize_train(supervised_data_reader& reader) = 0; virtual unsigned int get_max_batch_size() const = 0; // The method should add testing result to the training history of each element // Size of random_uniform_list is a power of 2 virtual void train_step( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, std::vector& task_list, const std::map& layer_to_dropout_rate_map, const std::vector& random_uniform_list) = 0; diff --git a/nnforge/network_trainer_sdlm.cpp b/nnforge/network_trainer_sdlm.cpp index 2b8f704..7df41a7 100644 --- a/nnforge/network_trainer_sdlm.cpp +++ b/nnforge/network_trainer_sdlm.cpp @@ -23,7 +23,7 @@ namespace nnforge { - const unsigned int network_trainer_sdlm::min_hessian_entry_to_process_count = 50; + const unsigned int network_trainer_sdlm::min_hessian_entry_to_process_count = 10; network_trainer_sdlm::network_trainer_sdlm( network_schema_smart_ptr schema, @@ -45,7 +45,7 @@ namespace nnforge } void network_trainer_sdlm::train_step( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, std::vector& task_list, const std::map& layer_to_dropout_rate_map, const std::vector& random_uniform_list) @@ -229,7 +229,7 @@ namespace nnforge return updater->get_max_batch_size(); } - void network_trainer_sdlm::initialize_train(supervised_data_reader_byte& reader) + void network_trainer_sdlm::initialize_train(supervised_data_reader& reader) { updater->set_input_configuration_specific(reader.get_input_configuration()); } diff --git a/nnforge/network_trainer_sdlm.h b/nnforge/network_trainer_sdlm.h index bbbb2d2..3a0c01c 100644 --- a/nnforge/network_trainer_sdlm.h +++ b/nnforge/network_trainer_sdlm.h @@ -47,12 +47,12 @@ namespace nnforge protected: // The method should add testing result to the training history of each element virtual void train_step( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, std::vector& task_list, const std::map& layer_to_dropout_rate_map, const std::vector& random_uniform_list); - virtual void initialize_train(supervised_data_reader_byte& reader); + virtual void initialize_train(supervised_data_reader& reader); virtual unsigned int get_max_batch_size() const; diff --git a/nnforge/network_updater.cpp b/nnforge/network_updater.cpp index a3d22fc..e84c252 100644 --- a/nnforge/network_updater.cpp +++ b/nnforge/network_updater.cpp @@ -21,12 +21,9 @@ namespace nnforge { - network_updater::network_updater( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) + network_updater::network_updater(network_schema_smart_ptr schema) : schema(schema) , profile_mode(false) - , scale_params(scale_params) { } @@ -39,16 +36,6 @@ namespace nnforge if ((layer_config_list.size() > 0) && (layer_config_list[0] == input_configuration_specific)) return; - if (scale_params == 0) - current_scale_params = const_data_scale_params_smart_ptr(new data_scale_params(input_configuration_specific.feature_map_count)); - else - { - current_scale_params = scale_params; - if (current_scale_params->feature_map_count != input_configuration_specific.feature_map_count) - throw neural_network_exception((boost::format("Feature map counts for scaling and in input data don't match: %1% and %2%") - % current_scale_params->feature_map_count % input_configuration_specific.feature_map_count).str()); - } - layer_config_list = schema->get_layer_configuration_specific_list(input_configuration_specific); update_flops(); @@ -57,7 +44,7 @@ namespace nnforge } std::vector network_updater::update( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, const std::vector& training_speed_vector_list, std::vector& data_list, const std::map& layer_to_dropout_rate_map, diff --git a/nnforge/network_updater.h b/nnforge/network_updater.h index 5a1b651..bcdb635 100644 --- a/nnforge/network_updater.h +++ b/nnforge/network_updater.h @@ -21,7 +21,6 @@ #include "layer_configuration_specific.h" #include "supervised_data_reader.h" #include "testing_result.h" -#include "data_scale_params.h" #include @@ -32,12 +31,12 @@ namespace nnforge public: virtual ~network_updater(); - // You don't need to call this method before calling get_hessian with supervised_data_reader_byte + // You don't need to call this method before calling get_hessian with supervised_data_reader void set_input_configuration_specific(const layer_configuration_specific& input_configuration_specific); // Size of random_uniform_list is a power of 2 std::vector update( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, const std::vector& training_speed_vector_list, std::vector& data_list, const std::map& layer_to_dropout_rate_map, @@ -53,13 +52,11 @@ namespace nnforge unsigned int entry_count_updated_in_profile_mode; protected: - network_updater( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params); + network_updater(network_schema_smart_ptr schema); // schema, data and reader are guaranteed to be compatible virtual std::vector actual_update( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, const std::vector& training_speed_vector_list, std::vector& data_list, const std::map& layer_to_dropout_rate_map, @@ -74,15 +71,12 @@ namespace nnforge protected: network_schema_smart_ptr schema; layer_configuration_specific_list layer_config_list; - const_data_scale_params_smart_ptr current_scale_params; // Defined in set_input_configuration_specific float flops; private: network_updater(); network_updater(const network_updater&); network_updater& operator =(const network_updater&); - - const_data_scale_params_smart_ptr scale_params; }; typedef std::tr1::shared_ptr network_updater_smart_ptr; diff --git a/nnforge/network_updater_factory.h b/nnforge/network_updater_factory.h index 345d1ff..b3a6635 100644 --- a/nnforge/network_updater_factory.h +++ b/nnforge/network_updater_factory.h @@ -26,9 +26,7 @@ namespace nnforge public: virtual ~network_updater_factory(); - virtual network_updater_smart_ptr create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const = 0; + virtual network_updater_smart_ptr create(network_schema_smart_ptr schema) const = 0; protected: network_updater_factory(); diff --git a/nnforge/neural_network_toolset.cpp b/nnforge/neural_network_toolset.cpp index 41cae25..ede59b2 100644 --- a/nnforge/neural_network_toolset.cpp +++ b/nnforge/neural_network_toolset.cpp @@ -93,10 +93,6 @@ namespace nnforge { randomize_data(); } - else if (!action.compare("normalize_data")) - { - normalize_data(); - } else if (!action.compare("validate")) { validate(true, false); @@ -163,7 +159,7 @@ namespace nnforge boost::program_options::options_description gener("Generic options"); gener.add_options() ("help", "produce help message") - ("action,A", boost::program_options::value(&action), "run action (info, create, prepare_data, normalize_data, randomize_data, test, test_batch, validate, validate_batch, validate_infinite, train, train_batch, snapshot, snapshot_invalid, profile_updater, profile_hessian)") + ("action,A", boost::program_options::value(&action), "run action (info, create, prepare_data, randomize_data, test, test_batch, validate, validate_batch, validate_infinite, train, train_batch, snapshot, snapshot_invalid, profile_updater, profile_hessian)") ("config,C", boost::program_options::value(&config_file)->default_value(default_config_path), "path to the configuration file.") ; @@ -374,66 +370,12 @@ namespace nnforge return dst; } - void neural_network_toolset::normalize_data() - { - if (!is_normalize_input()) - throw std::runtime_error("This configuration doesn't assume input data to be normalized"); - - std::tr1::shared_ptr in(new boost::filesystem::ifstream(get_working_data_folder() / training_data_filename, std::ios_base::in | std::ios_base::binary)); - supervised_data_stream_reader_byte reader(in); - - std::vector avg = reader.get_feature_map_average(); - std::vector > min_max = reader.get_feature_map_min_max(); - std::vector std_dev = reader.get_feature_map_std_dev(avg); - - data_scale_params scale_params(static_cast(avg.size())); - - std::cout << "Data distribution in training data:" << std::endl; - for(unsigned int i = 0; i < avg.size(); ++i) - { - std::cout << (boost::format("Feature map %1%: Avg %|2$.5f|, StdDev %|3$.5f|") % (i + 1) % avg[i] % std_dev[i]); - if (std_dev[i] > 0.0F) - { - float suggested_mult = 1.0F / std_dev[i]; - - std::cout << (boost::format(", Suggested scale %|1$.1f|") % suggested_mult).str(); - float actual_mult; - float max_mult = max_val_after_normalization / std::max(min_max[i].second - avg[i], avg[i] - min_max[i].first); - if (suggested_mult > max_mult) - { - std::cout << (boost::format(", Clipped to %|1$.1f|") % max_mult).str(); - actual_mult = max_mult; - } - else - actual_mult = suggested_mult; - std::cout << std::endl; - - scale_params.addition_list[i] = -avg[i]; - scale_params.multiplication_list[i] = actual_mult; - } - else - { - std::cout << ", Set to 0" << std::endl; - - scale_params.addition_list[i] = 0.0F; - scale_params.multiplication_list[i] = 0.0F; - } - } - - boost::filesystem::path dest_path = get_working_data_folder() / scaling_params_filename; - { - std::tr1::shared_ptr out(new boost::filesystem::ofstream(dest_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc)); - scale_params.write(*out); - } - std::cout << "Scale params dumped to " << dest_path << std::endl; - } - void neural_network_toolset::randomize_data() { std::tr1::shared_ptr in(new boost::filesystem::ifstream(get_working_data_folder() / training_data_filename, std::ios_base::in | std::ios_base::binary)); std::tr1::shared_ptr out(new boost::filesystem::ofstream(get_working_data_folder() / training_randomized_data_filename, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc)); - supervised_data_stream_reader_byte reader(in); + supervised_data_stream_reader reader(in); std::cout << "Randomizing " << reader.get_entry_count() << " entries" << std::endl; @@ -479,7 +421,7 @@ namespace nnforge schema->read(in); } - return tester_factory->create(schema, get_data_scale_params()); + return tester_factory->create(schema); } void neural_network_toolset::validate( @@ -497,7 +439,7 @@ namespace nnforge tester->set_data(data); std::tr1::shared_ptr in(new boost::filesystem::ifstream(get_working_data_folder() / (is_validate ? validating_data_filename : testing_data_filename), std::ios_base::in | std::ios_base::binary)); - supervised_data_stream_reader_byte reader(in); + supervised_data_stream_reader reader(in); output_neuron_value_set_smart_ptr actual_neuron_value_set = reader.get_output_neuron_value_set(); @@ -522,7 +464,7 @@ namespace nnforge boost::filesystem::path batch_folder = get_working_data_folder() / batch_subfolder_name; std::tr1::shared_ptr in(new boost::filesystem::ifstream(get_working_data_folder() / (is_validate ? validating_data_filename : testing_data_filename), std::ios_base::in | std::ios_base::binary)); - supervised_data_stream_reader_byte reader(in); + supervised_data_stream_reader reader(in); output_neuron_value_set_smart_ptr actual_neuron_value_set = reader.get_output_neuron_value_set(); output_neuron_class_set actual_cs(*actual_neuron_value_set); @@ -631,14 +573,14 @@ namespace nnforge tester->set_data(data); std::tr1::shared_ptr in(new boost::filesystem::ifstream(get_working_data_folder() / validating_data_filename, std::ios_base::in | std::ios_base::binary)); - supervised_data_stream_reader_byte reader(in); + supervised_data_stream_reader reader(in); reader.reset(); tester->set_input_configuration_specific(reader.get_input_configuration()); unsigned int image_count = std::min(snapshot_count, reader.get_entry_count()); - std::vector input(reader.get_input_configuration().get_neuron_count()); + std::vector input(reader.get_input_configuration().get_neuron_count() * reader.get_input_neuron_elem_size()); for(unsigned int image_id = 0; image_id < image_count; ++image_id) { if (!reader.read(&(*input.begin()), 0)) @@ -646,7 +588,10 @@ namespace nnforge std::string snapshot_filename = (boost::format("%|1$03d|") % image_id).str(); - std::vector data_res = tester->get_snapshot(input); + std::vector data_res = tester->get_snapshot( + &(*input.begin()), + reader.get_input_type(), + reader.get_input_configuration().get_neuron_count()); save_snapshot(snapshot_filename, data_res); } @@ -691,7 +636,7 @@ namespace nnforge tester->set_data(data); std::tr1::shared_ptr in(new boost::filesystem::ifstream(get_working_data_folder() / validating_data_filename, std::ios_base::in | std::ios_base::binary)); - supervised_data_stream_reader_byte reader(in); + supervised_data_stream_reader reader(in); output_neuron_value_set_smart_ptr actual_neuron_value_set = reader.get_output_neuron_value_set(); @@ -710,7 +655,7 @@ namespace nnforge tester->set_input_configuration_specific(reader.get_input_configuration()); - std::vector input(reader.get_input_configuration().get_neuron_count()); + std::vector input(reader.get_input_configuration().get_neuron_count() * reader.get_input_neuron_elem_size()); unsigned int entry_id = 0; for(std::vector >::const_iterator it = cr.predicted_and_actual_class_pair_id_list.begin(); it != cr.predicted_and_actual_class_pair_id_list.end(); @@ -729,7 +674,10 @@ namespace nnforge std::string snapshot_filename = (boost::format("actual_%|1$s|_predicted_%|2$s|_entry_%|3$03d|") % get_class_name_by_id(actual_class_id) % get_class_name_by_id(predicted_class_id) % entry_id).str(); - std::vector res = tester->get_snapshot(input); + std::vector res = tester->get_snapshot( + &(*input.begin()), + reader.get_input_type(), + reader.get_input_configuration().get_neuron_count()); save_snapshot(snapshot_filename, res); } @@ -767,8 +715,8 @@ namespace nnforge if (is_training_with_validation()) { std::tr1::shared_ptr validating_data_stream(new boost::filesystem::ifstream(get_working_data_folder() / validating_data_filename, std::ios_base::in | std::ios_base::binary)); - supervised_data_reader_byte_smart_ptr validating_data_reader(new supervised_data_stream_reader_byte(validating_data_stream)); - res.push_back(network_data_pusher_smart_ptr(new validate_progress_network_data_pusher(tester_factory->create(schema, get_data_scale_params()), validating_data_reader, get_testing_visualizer()))); + supervised_data_reader_smart_ptr validating_data_reader(new supervised_data_stream_reader(validating_data_stream)); + res.push_back(network_data_pusher_smart_ptr(new validate_progress_network_data_pusher(tester_factory->create(schema), validating_data_reader, get_testing_visualizer()))); } return res; @@ -782,9 +730,9 @@ namespace nnforge schema->read(in); } - hessian_calculator_smart_ptr hessian = hessian_factory->create(schema, get_data_scale_params()); + hessian_calculator_smart_ptr hessian = hessian_factory->create(schema); - network_updater_smart_ptr updater = updater_factory->create(schema, get_data_scale_params()); + network_updater_smart_ptr updater = updater_factory->create(schema); network_trainer_sdlm trainer( schema, @@ -797,7 +745,7 @@ namespace nnforge trainer.mu_increase_factor = mu_increase_factor; std::tr1::shared_ptr training_data_stream(new boost::filesystem::ifstream(get_working_data_folder() / training_randomized_data_filename, std::ios_base::in | std::ios_base::binary)); - supervised_data_stream_reader_byte training_data_reader(training_data_stream); + supervised_data_stream_reader training_data_reader(training_data_stream); std::tr1::shared_ptr peeker; boost::filesystem::path batch_folder; @@ -854,10 +802,10 @@ namespace nnforge schema->read(in); } - network_updater_smart_ptr updater = updater_factory->create(schema, get_data_scale_params()); + network_updater_smart_ptr updater = updater_factory->create(schema); std::tr1::shared_ptr training_data_stream(new boost::filesystem::ifstream(get_working_data_folder() / training_data_filename, std::ios_base::in | std::ios_base::binary)); - supervised_data_stream_reader_byte training_data_reader(training_data_stream); + supervised_data_stream_reader training_data_reader(training_data_stream); std::vector training_speed; std::vector data; @@ -910,10 +858,10 @@ namespace nnforge schema->read(in); } - hessian_calculator_smart_ptr hessian = hessian_factory->create(schema, get_data_scale_params()); + hessian_calculator_smart_ptr hessian = hessian_factory->create(schema); std::tr1::shared_ptr training_data_stream(new boost::filesystem::ifstream(get_working_data_folder() / training_data_filename, std::ios_base::in | std::ios_base::binary)); - supervised_data_stream_reader_byte training_data_reader(training_data_stream); + supervised_data_stream_reader training_data_reader(training_data_stream); network_data_smart_ptr data(new network_data(*schema)); { @@ -952,21 +900,6 @@ namespace nnforge : testing_complete_result_set_visualizer_smart_ptr(new testing_complete_result_set_visualizer()); } - const_data_scale_params_smart_ptr neural_network_toolset::get_data_scale_params() const - { - if (is_normalize_input()) - { - data_scale_params_smart_ptr res(new data_scale_params()); - - std::tr1::shared_ptr in(new boost::filesystem::ifstream(get_working_data_folder() / scaling_params_filename, std::ios_base::in | std::ios_base::binary)); - res->read(*in); - - return res; - } - else - return const_data_scale_params_smart_ptr(); - } - bool neural_network_toolset::is_normalize_input() const { return false; diff --git a/nnforge/neural_network_toolset.h b/nnforge/neural_network_toolset.h index bb20e84..09d0f60 100644 --- a/nnforge/neural_network_toolset.h +++ b/nnforge/neural_network_toolset.h @@ -21,7 +21,6 @@ #include "network_data_pusher.h" #include "testing_complete_result_set_visualizer.h" -#include "data_scale_params.h" #include #include @@ -63,8 +62,6 @@ namespace nnforge virtual void randomize_data(); - virtual void normalize_data(); - virtual void create(); virtual network_tester_smart_ptr get_tester(); @@ -100,8 +97,6 @@ namespace nnforge virtual bool is_training_with_validation() const; - virtual const_data_scale_params_smart_ptr get_data_scale_params() const; - virtual testing_complete_result_set_visualizer_smart_ptr get_testing_visualizer() const; static cv::Mat rotate_scale_shift( diff --git a/nnforge/neuron_data_type.cpp b/nnforge/neuron_data_type.cpp new file mode 100644 index 0000000..132eca0 --- /dev/null +++ b/nnforge/neuron_data_type.cpp @@ -0,0 +1,36 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "neuron_data_type.h" + +#include "neural_network_exception.h" +#include + +namespace nnforge +{ + size_t neuron_data_type::get_input_size(input_type t) + { + switch(t) + { + case type_byte: + return sizeof(unsigned char); + case type_float: + return sizeof(float); + } + + throw neural_network_exception((boost::format("Unknown input type %1%") % t).str()); + } +} diff --git a/nnforge/data_scale_params.h b/nnforge/neuron_data_type.h similarity index 54% rename from nnforge/data_scale_params.h rename to nnforge/neuron_data_type.h index 2708c77..28fcea5 100644 --- a/nnforge/data_scale_params.h +++ b/nnforge/neuron_data_type.h @@ -16,29 +16,25 @@ #pragma once -#include -#include -#include #include namespace nnforge { - class data_scale_params + class neuron_data_type { public: - data_scale_params(); - - data_scale_params(unsigned int feature_map_count); - - void write(std::ostream& output_stream) const; - - void read(std::istream& input_stream); - - unsigned int feature_map_count; - std::vector addition_list; - std::vector multiplication_list; + enum input_type + { + type_unknown = 0, + type_byte = 1, + type_float = 2 + }; + + static size_t get_input_size(input_type t); + + private: + neuron_data_type(); + neuron_data_type(const neuron_data_type&); + neuron_data_type& operator =(const neuron_data_type&); }; - - typedef std::tr1::shared_ptr data_scale_params_smart_ptr; - typedef std::tr1::shared_ptr const_data_scale_params_smart_ptr; } diff --git a/nnforge/plain/hessian_calculator_plain.cpp b/nnforge/plain/hessian_calculator_plain.cpp index e2ea9fa..b9d9366 100644 --- a/nnforge/plain/hessian_calculator_plain.cpp +++ b/nnforge/plain/hessian_calculator_plain.cpp @@ -28,9 +28,8 @@ namespace nnforge { hessian_calculator_plain::hessian_calculator_plain( network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params, plain_running_configuration_const_smart_ptr plain_config) - : hessian_calculator(schema, scale_params) + : hessian_calculator(schema) , plain_config(plain_config) { const const_layer_list& layer_list = *schema; @@ -59,7 +58,7 @@ namespace nnforge } network_data_smart_ptr hessian_calculator_plain::actual_get_hessian( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, network_data_smart_ptr data, unsigned int hessian_entry_to_process_count) { @@ -71,10 +70,12 @@ namespace nnforge const unsigned int output_neuron_count = reader.get_output_configuration().get_neuron_count(); const unsigned int input_feature_map_count = reader.get_input_configuration().feature_map_count; const unsigned int neuron_count_per_input_feature_map = reader.get_input_configuration().get_neuron_count_per_feature_map(); + neuron_data_type::input_type type_code = reader.get_input_type(); + size_t input_neuron_elem_size = reader.get_input_neuron_elem_size(); buffer_plain_size_configuration buffers_config; update_buffers_configuration(buffers_config); - buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(unsigned char)); // input + buffers_config.add_per_entry_buffer(input_neuron_count * input_neuron_elem_size); // input buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(float)); // converted input buffers_config.add_per_entry_buffer(output_neuron_count * sizeof(float)); // initial error for(std::vector::const_iterator it = data->begin(); it != data->end(); ++it) @@ -88,7 +89,7 @@ namespace nnforge unsigned int max_entry_count = std::min(plain_config->get_max_entry_count(buffers_config), hessian_entry_to_process_count); - std::vector input_buf(max_entry_count * input_neuron_count); + std::vector input_buf(max_entry_count * input_neuron_count * input_neuron_elem_size); additional_buffer_smart_ptr initial_error_buf(new std::vector(max_entry_count * output_neuron_count)); additional_buffer_smart_ptr input_converted_buf(new std::vector(input_neuron_count * max_entry_count)); @@ -142,7 +143,7 @@ namespace nnforge while((entries_available_for_processing_count < max_entry_count) && (entries_read_count < hessian_entry_to_process_count)) { bool entry_read = reader.read( - &(*(input_buf.begin() + (input_neuron_count * entries_available_for_processing_count))), + &(*(input_buf.begin() + (input_neuron_count * entries_available_for_processing_count * input_neuron_elem_size))), 0); if (!entry_read) @@ -159,26 +160,24 @@ namespace nnforge // Convert input { - const int elem_count = static_cast(const_entries_available_for_processing_count); + const int elem_count = static_cast(entries_available_for_processing_count * input_neuron_count); const std::vector::iterator input_converted_buf_it_start = input_converted_buf->begin(); - const std::vector::const_iterator input_buf_it_start = input_buf.begin(); - #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) - for(int i = 0; i < elem_count; ++i) + if (type_code == neuron_data_type::type_byte) { - std::vector::iterator input_converted_buf_it = input_converted_buf_it_start + (i * input_neuron_count); - std::vector::const_iterator input_buf_it = input_buf_it_start + (i * input_neuron_count); - for(unsigned int feature_map_id = 0; feature_map_id < input_feature_map_count; ++feature_map_id) - { - float addition = current_scale_params->addition_list[feature_map_id]; - float multiplication = current_scale_params->multiplication_list[feature_map_id]; - for(unsigned int j = 0; j < neuron_count_per_input_feature_map; ++j) - { - *input_converted_buf_it = ((static_cast(*input_buf_it) * (1.0F / 255.0F)) + addition) * multiplication; - input_converted_buf_it++; - input_buf_it++; - } - } + const unsigned char * const input_buf_it_start = &(*input_buf.begin()); + #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) + for(int i = 0; i < elem_count; ++i) + *(input_converted_buf_it_start + i) = static_cast(*(input_buf_it_start + i)) * (1.0F / 255.0F); } + else if (type_code == neuron_data_type::type_float) + { + const float * const input_buf_it_start = reinterpret_cast(&(*input_buf.begin())); + #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) + for(int i = 0; i < elem_count; ++i) + *(input_converted_buf_it_start + i) = *(input_buf_it_start + i); + } + else + throw neural_network_exception((boost::format("actual_get_hessian cannot handle input neurons of type %1%") % type_code).str()); } // Run ann diff --git a/nnforge/plain/hessian_calculator_plain.h b/nnforge/plain/hessian_calculator_plain.h index 3929262..7b801d9 100644 --- a/nnforge/plain/hessian_calculator_plain.h +++ b/nnforge/plain/hessian_calculator_plain.h @@ -32,7 +32,6 @@ namespace nnforge public: hessian_calculator_plain( network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params, plain_running_configuration_const_smart_ptr plain_config); ~hessian_calculator_plain(); @@ -40,7 +39,7 @@ namespace nnforge protected: // schema, data and reader are guaranteed to be compatible virtual network_data_smart_ptr actual_get_hessian( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, network_data_smart_ptr data, unsigned int hessian_entry_to_process_count); diff --git a/nnforge/plain/hessian_calculator_plain_factory.cpp b/nnforge/plain/hessian_calculator_plain_factory.cpp index fb55704..40bcbb9 100644 --- a/nnforge/plain/hessian_calculator_plain_factory.cpp +++ b/nnforge/plain/hessian_calculator_plain_factory.cpp @@ -31,11 +31,9 @@ namespace nnforge { } - hessian_calculator_smart_ptr hessian_calculator_plain_factory::create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const + hessian_calculator_smart_ptr hessian_calculator_plain_factory::create(network_schema_smart_ptr schema) const { - return hessian_calculator_smart_ptr(new hessian_calculator_plain(schema, scale_params, plain_config)); + return hessian_calculator_smart_ptr(new hessian_calculator_plain(schema, plain_config)); } } } diff --git a/nnforge/plain/hessian_calculator_plain_factory.h b/nnforge/plain/hessian_calculator_plain_factory.h index 17baf37..3673466 100644 --- a/nnforge/plain/hessian_calculator_plain_factory.h +++ b/nnforge/plain/hessian_calculator_plain_factory.h @@ -30,9 +30,7 @@ namespace nnforge virtual ~hessian_calculator_plain_factory(); - virtual hessian_calculator_smart_ptr create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const; + virtual hessian_calculator_smart_ptr create(network_schema_smart_ptr schema) const; protected: plain_running_configuration_const_smart_ptr plain_config; diff --git a/nnforge/plain/network_tester_plain.cpp b/nnforge/plain/network_tester_plain.cpp index 1de3f2b..808402e 100644 --- a/nnforge/plain/network_tester_plain.cpp +++ b/nnforge/plain/network_tester_plain.cpp @@ -17,6 +17,9 @@ #include "network_tester_plain.h" #include "layer_tester_plain_factory.h" +#include "../neural_network_exception.h" + +#include namespace nnforge { @@ -24,9 +27,8 @@ namespace nnforge { network_tester_plain::network_tester_plain( network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params, plain_running_configuration_const_smart_ptr plain_config) - : network_tester(schema, scale_params) + : network_tester(schema) , plain_config(plain_config) { const const_layer_list& layer_list = *schema; @@ -39,7 +41,7 @@ namespace nnforge } void network_tester_plain::actual_test( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, testing_complete_result_set& result) { reader.reset(); @@ -49,17 +51,19 @@ namespace nnforge const unsigned int entry_count = reader.get_entry_count(); const unsigned int input_feature_map_count = reader.get_input_configuration().feature_map_count; const unsigned int neuron_count_per_input_feature_map = reader.get_input_configuration().get_neuron_count_per_feature_map(); + neuron_data_type::input_type type_code = reader.get_input_type(); + size_t input_neuron_elem_size = reader.get_input_neuron_elem_size(); result.mse = testing_result_smart_ptr(new testing_result(output_neuron_count)); buffer_plain_size_configuration buffers_config; update_buffers_configuration_testing(buffers_config); - buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(unsigned char)); // input + buffers_config.add_per_entry_buffer(input_neuron_count * input_neuron_elem_size); // input buffers_config.add_per_entry_buffer(output_neuron_count * sizeof(float)); // output buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(float)); // converted input const unsigned int max_entry_count = std::min(plain_config->get_max_entry_count(buffers_config), reader.get_entry_count()); - std::vector input_buf(input_neuron_count * max_entry_count); + std::vector input_buf(input_neuron_count * max_entry_count * input_neuron_elem_size); std::vector actual_output_buf(output_neuron_count * max_entry_count); additional_buffer_smart_ptr input_converted_buf(new std::vector(input_neuron_count * max_entry_count)); std::vector& mse_buf = result.mse->cumulative_mse_list; @@ -90,7 +94,7 @@ namespace nnforge while(entries_available_for_processing_count < max_entry_count) { bool entry_read = reader.read( - &(*(input_buf.begin() + (input_neuron_count * entries_available_for_processing_count))), + &(*(input_buf.begin() + (input_neuron_count * entries_available_for_processing_count * input_neuron_elem_size))), &(*(actual_output_buf.begin() + (output_neuron_count * entries_available_for_processing_count)))); if (!entry_read) { @@ -103,30 +107,26 @@ namespace nnforge if (entries_available_for_processing_count == 0) break; - const unsigned int const_entries_available_for_processing_count = entries_available_for_processing_count; - // Convert input { - const int elem_count = static_cast(const_entries_available_for_processing_count); + const int elem_count = static_cast(entries_available_for_processing_count * input_neuron_count); const std::vector::iterator input_converted_buf_it_start = input_converted_buf->begin(); - const std::vector::const_iterator input_buf_it_start = input_buf.begin(); - #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) - for(int i = 0; i < elem_count; ++i) + if (type_code == neuron_data_type::type_byte) { - std::vector::iterator input_converted_buf_it = input_converted_buf_it_start + (i * input_neuron_count); - std::vector::const_iterator input_buf_it = input_buf_it_start + (i * input_neuron_count); - for(unsigned int feature_map_id = 0; feature_map_id < input_feature_map_count; ++feature_map_id) - { - float addition = current_scale_params->addition_list[feature_map_id]; - float multiplication = current_scale_params->multiplication_list[feature_map_id]; - for(unsigned int j = 0; j < neuron_count_per_input_feature_map; ++j) - { - *input_converted_buf_it = ((static_cast(*input_buf_it) * (1.0F / 255.0F)) + addition) * multiplication; - input_converted_buf_it++; - input_buf_it++; - } - } + const unsigned char * const input_buf_it_start = &(*input_buf.begin()); + #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) + for(int i = 0; i < elem_count; ++i) + *(input_converted_buf_it_start + i) = static_cast(*(input_buf_it_start + i)) * (1.0F / 255.0F); + } + else if (type_code == neuron_data_type::type_float) + { + const float * const input_buf_it_start = reinterpret_cast(&(*input_buf.begin())); + #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) + for(int i = 0; i < elem_count; ++i) + *(input_converted_buf_it_start + i) = *(input_buf_it_start + i); } + else + throw neural_network_exception((boost::format("actual_run cannot handle input neurons of type %1%") % type_code).str()); } // Run ann @@ -156,6 +156,7 @@ namespace nnforge const std::vector::iterator mse_buf_it = mse_buf.begin(); const std::vector::const_iterator actual_output_buf_it = actual_output_buf.begin(); const std::vector::const_iterator output_buffer_it = output_buffer->begin(); + const int const_entries_available_for_processing_count = entries_available_for_processing_count; #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) for(int i = 0; i < total_workload; ++i) { @@ -189,7 +190,7 @@ namespace nnforge } } - output_neuron_value_set_smart_ptr network_tester_plain::actual_run(unsupervised_data_reader_byte& reader) + output_neuron_value_set_smart_ptr network_tester_plain::actual_run(unsupervised_data_reader& reader) { reader.reset(); @@ -198,17 +199,19 @@ namespace nnforge const unsigned int entry_count = reader.get_entry_count(); const unsigned int input_feature_map_count = reader.get_input_configuration().feature_map_count; const unsigned int neuron_count_per_input_feature_map = reader.get_input_configuration().get_neuron_count_per_feature_map(); + neuron_data_type::input_type type_code = reader.get_input_type(); + size_t input_neuron_elem_size = reader.get_input_neuron_elem_size(); output_neuron_value_set_smart_ptr predicted_output_neuron_value_set(new output_neuron_value_set(entry_count, output_neuron_count)); buffer_plain_size_configuration buffers_config; update_buffers_configuration_testing(buffers_config); - buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(unsigned char)); // input + buffers_config.add_per_entry_buffer(input_neuron_count * input_neuron_elem_size); // input buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(float)); // converted input const unsigned int max_entry_count = std::min(plain_config->get_max_entry_count(buffers_config), reader.get_entry_count()); - std::vector input_buf(input_neuron_count * max_entry_count); + std::vector input_buf(input_neuron_count * max_entry_count * input_neuron_elem_size); additional_buffer_smart_ptr input_converted_buf(new std::vector(input_neuron_count * max_entry_count)); additional_buffer_smart_ptr output_buffer = input_converted_buf; @@ -237,7 +240,7 @@ namespace nnforge unsigned int entries_available_for_processing_count = 0; while(entries_available_for_processing_count < max_entry_count) { - bool entry_read = reader.read(&(*(input_buf.begin() + (input_neuron_count * entries_available_for_processing_count)))); + bool entry_read = reader.read(&(*(input_buf.begin() + (input_neuron_count * entries_available_for_processing_count * input_neuron_elem_size)))); if (!entry_read) { entries_remained_for_loading = false; @@ -249,30 +252,25 @@ namespace nnforge if (entries_available_for_processing_count == 0) break; - const unsigned int const_entries_available_for_processing_count = entries_available_for_processing_count; - // Convert input { - const int elem_count = static_cast(const_entries_available_for_processing_count); + const int elem_count = static_cast(entries_available_for_processing_count * input_neuron_count); const std::vector::iterator input_converted_buf_it_start = input_converted_buf->begin(); - const std::vector::const_iterator input_buf_it_start = input_buf.begin(); - #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) - for(int i = 0; i < elem_count; ++i) + if (type_code == neuron_data_type::type_byte) { - std::vector::iterator input_converted_buf_it = input_converted_buf_it_start + (i * input_neuron_count); - std::vector::const_iterator input_buf_it = input_buf_it_start + (i * input_neuron_count); - for(unsigned int feature_map_id = 0; feature_map_id < input_feature_map_count; ++feature_map_id) - { - float addition = current_scale_params->addition_list[feature_map_id]; - float multiplication = current_scale_params->multiplication_list[feature_map_id]; - for(unsigned int j = 0; j < neuron_count_per_input_feature_map; ++j) - { - *input_converted_buf_it = ((static_cast(*input_buf_it) * (1.0F / 255.0F)) + addition) * multiplication; - input_converted_buf_it++; - input_buf_it++; - } - } + const unsigned char * const input_buf_it_start = &(*input_buf.begin()); + #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) + for(int i = 0; i < elem_count; ++i) + *(input_converted_buf_it_start + i) = static_cast(*(input_buf_it_start + i)) * (1.0F / 255.0F); } + else if (type_code == neuron_data_type::type_float) + { + const float * const input_buf_it_start = reinterpret_cast(&(*input_buf.begin())); + #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) + for(int i = 0; i < elem_count; ++i) + *(input_converted_buf_it_start + i) = *(input_buf_it_start + i); + } + else throw neural_network_exception((boost::format("actual_run cannot handle input neurons of type %1%") % type_code).str()); } // Run ann @@ -321,13 +319,15 @@ namespace nnforge net_data = data; } - std::vector network_tester_plain::actual_get_snapshot(std::vector& input) + std::vector network_tester_plain::actual_get_snapshot( + const void * input, + neuron_data_type::input_type type_code) { std::vector res; const unsigned int input_neuron_count = layer_config_list[0].get_neuron_count(); - const unsigned int input_feature_map_count = static_cast(current_scale_params->feature_map_count); - const unsigned int neuron_count_per_input_feature_map = static_cast(input.size() / current_scale_params->feature_map_count); + const unsigned int input_feature_map_count = layer_config_list[0].feature_map_count; + const unsigned int neuron_count_per_input_feature_map = layer_config_list[0].get_neuron_count_per_feature_map(); additional_buffer_smart_ptr input_converted_buf(new std::vector(input_neuron_count)); @@ -358,31 +358,34 @@ namespace nnforge { layer_configuration_specific_snapshot_smart_ptr input_elem(new layer_configuration_specific_snapshot(layer_config_list[0])); res.push_back(input_elem); - - const int elem_count = static_cast(input_feature_map_count); + const int elem_count = static_cast(input_neuron_count); const std::vector::iterator input_converted_buf_it_start = input_converted_buf->begin(); const std::vector::iterator input_elem_it_start = input_elem->data.begin(); - const std::vector::const_iterator input_buf_it_start = input.begin(); - #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) - for(int feature_map_id = 0; feature_map_id < elem_count; ++feature_map_id) + if (type_code == neuron_data_type::type_byte) { - std::vector::iterator input_converted_buf_it = input_converted_buf_it_start + (feature_map_id * neuron_count_per_input_feature_map); - std::vector::iterator input_elem_it = input_elem_it_start + (feature_map_id * neuron_count_per_input_feature_map); - std::vector::const_iterator input_buf_it = input_buf_it_start + (feature_map_id * neuron_count_per_input_feature_map); - - float addition = current_scale_params->addition_list[feature_map_id]; - float multiplication = current_scale_params->multiplication_list[feature_map_id]; - for(unsigned int j = 0; j < neuron_count_per_input_feature_map; ++j) + const unsigned char * const input_buf_it_start = static_cast(input); + #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) + for(int i = 0; i < elem_count; ++i) { - float val = ((static_cast(*input_buf_it) * (1.0F / 255.0F)) + addition) * multiplication; - *input_converted_buf_it = val; - *input_elem_it = val; - input_buf_it++; - input_converted_buf_it++; - input_elem_it++; + float val = static_cast(*(input_buf_it_start + i)) * (1.0F / 255.0F); + *(input_converted_buf_it_start + i) = val; + *(input_elem_it_start + i) = val; } } + else if (type_code == neuron_data_type::type_float) + { + const float * const input_buf_it_start = static_cast(input); + #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) + for(int i = 0; i < elem_count; ++i) + { + float val = *(input_buf_it_start + i); + *(input_converted_buf_it_start + i) = val; + *(input_elem_it_start + i) = val; + } + } + else + throw neural_network_exception((boost::format("actual_get_snapshot cannot handle input neurons of type %1%") % type_code).str()); } // Run ann @@ -421,13 +424,15 @@ namespace nnforge return res; } - layer_configuration_specific_snapshot_smart_ptr network_tester_plain::actual_run(std::vector& input) + layer_configuration_specific_snapshot_smart_ptr network_tester_plain::actual_run( + const void * input, + neuron_data_type::input_type type_code) { layer_configuration_specific_snapshot_smart_ptr res(new layer_configuration_specific_snapshot(layer_config_list[layer_config_list.size() - 1])); const unsigned int input_neuron_count = layer_config_list[0].get_neuron_count(); - const unsigned int input_feature_map_count = current_scale_params->feature_map_count; - const unsigned int neuron_count_per_input_feature_map = static_cast(input.size() / current_scale_params->feature_map_count); + const unsigned int input_feature_map_count = layer_config_list[0].feature_map_count; + const unsigned int neuron_count_per_input_feature_map = layer_config_list[0].get_neuron_count_per_feature_map(); additional_buffer_smart_ptr input_converted_buf(new std::vector(input_neuron_count)); @@ -454,24 +459,24 @@ namespace nnforge // Convert input { - const int elem_count = static_cast(input_feature_map_count); + const int elem_count = static_cast(input_neuron_count); const std::vector::iterator input_converted_buf_it_start = input_converted_buf->begin(); - const std::vector::const_iterator input_buf_it_start = input.begin(); - #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) - for(int feature_map_id = 0; feature_map_id < elem_count; ++feature_map_id) + if (type_code == neuron_data_type::type_byte) { - std::vector::iterator input_converted_buf_it = input_converted_buf_it_start + (feature_map_id * neuron_count_per_input_feature_map); - std::vector::const_iterator input_buf_it = input_buf_it_start + (feature_map_id * neuron_count_per_input_feature_map); - - float addition = current_scale_params->addition_list[feature_map_id]; - float multiplication = current_scale_params->multiplication_list[feature_map_id]; - for(unsigned int j = 0; j < neuron_count_per_input_feature_map; ++j) - { - *input_converted_buf_it = ((static_cast(*input_buf_it) * (1.0F / 255.0F)) + addition) * multiplication; - input_buf_it++; - input_converted_buf_it++; - } + const unsigned char * const input_buf_it_start = static_cast(input); + #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) + for(int i = 0; i < elem_count; ++i) + *(input_converted_buf_it_start + i) = static_cast(*(input_buf_it_start + i)) * (1.0F / 255.0F); + } + else if (type_code == neuron_data_type::type_float) + { + const float * const input_buf_it_start = static_cast(input); + #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) + for(int i = 0; i < elem_count; ++i) + *(input_converted_buf_it_start + i) = *(input_buf_it_start + i); } + else + throw neural_network_exception((boost::format("actual_run cannot handle input neurons of type %1%") % type_code).str()); } // Run ann diff --git a/nnforge/plain/network_tester_plain.h b/nnforge/plain/network_tester_plain.h index 8788cfb..61de64f 100644 --- a/nnforge/plain/network_tester_plain.h +++ b/nnforge/plain/network_tester_plain.h @@ -30,7 +30,6 @@ namespace nnforge public: network_tester_plain( network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params, plain_running_configuration_const_smart_ptr plain_config); virtual ~network_tester_plain(); @@ -38,20 +37,24 @@ namespace nnforge protected: // schema, data and reader are guaranteed to be compatible virtual void actual_test( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, testing_complete_result_set& result); // schema, data and reader are guaranteed to be compatible - virtual output_neuron_value_set_smart_ptr actual_run(unsupervised_data_reader_byte& reader); + virtual output_neuron_value_set_smart_ptr actual_run(unsupervised_data_reader& reader); // The method is called when client calls set_data. The data is guaranteed to be compatible with schema virtual void actual_set_data(network_data_smart_ptr data); // The method is called when client calls get_snapshot. The data is guaranteed to be compatible with schema - virtual std::vector actual_get_snapshot(std::vector& input); + virtual std::vector actual_get_snapshot( + const void * input, + neuron_data_type::input_type type_code); // The method is called when client calls get_snapshot. The data is guaranteed to be compatible with schema - virtual layer_configuration_specific_snapshot_smart_ptr actual_run(std::vector& input); + virtual layer_configuration_specific_snapshot_smart_ptr actual_run( + const void * input, + neuron_data_type::input_type type_code); // The method is called when client calls set_input_configuration_specific and the convolution specific configuration is modified. // The layer_config_list is guaranteed to be compatible with schema diff --git a/nnforge/plain/network_tester_plain_factory.cpp b/nnforge/plain/network_tester_plain_factory.cpp index 7e530d4..b2a7d1b 100644 --- a/nnforge/plain/network_tester_plain_factory.cpp +++ b/nnforge/plain/network_tester_plain_factory.cpp @@ -31,11 +31,9 @@ namespace nnforge { } - network_tester_smart_ptr network_tester_plain_factory::create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const + network_tester_smart_ptr network_tester_plain_factory::create(network_schema_smart_ptr schema) const { - return network_tester_smart_ptr(new network_tester_plain(schema, scale_params, plain_config)); + return network_tester_smart_ptr(new network_tester_plain(schema, plain_config)); } } } diff --git a/nnforge/plain/network_tester_plain_factory.h b/nnforge/plain/network_tester_plain_factory.h index 53af98d..035754f 100644 --- a/nnforge/plain/network_tester_plain_factory.h +++ b/nnforge/plain/network_tester_plain_factory.h @@ -30,9 +30,7 @@ namespace nnforge virtual ~network_tester_plain_factory(); - virtual network_tester_smart_ptr create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const; + virtual network_tester_smart_ptr create(network_schema_smart_ptr schema) const; protected: plain_running_configuration_const_smart_ptr plain_config; diff --git a/nnforge/plain/network_updater_plain.cpp b/nnforge/plain/network_updater_plain.cpp index f946c6b..944b4be 100644 --- a/nnforge/plain/network_updater_plain.cpp +++ b/nnforge/plain/network_updater_plain.cpp @@ -32,9 +32,8 @@ namespace nnforge network_updater_plain::network_updater_plain( network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params, plain_running_configuration_const_smart_ptr plain_config) - : network_updater(schema, scale_params) + : network_updater(schema) , plain_config(plain_config) { const const_layer_list& layer_list = *schema; @@ -63,7 +62,7 @@ namespace nnforge } std::vector network_updater_plain::actual_update( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, const std::vector& training_speed_vector_list, std::vector& data_list, const std::map& layer_to_dropout_rate_map, @@ -80,6 +79,8 @@ namespace nnforge const unsigned int output_neuron_count = reader.get_output_configuration().get_neuron_count(); const unsigned int input_feature_map_count = reader.get_input_configuration().feature_map_count; const unsigned int neuron_count_per_input_feature_map = reader.get_input_configuration().get_neuron_count_per_feature_map(); + neuron_data_type::input_type type_code = reader.get_input_type(); + size_t input_neuron_elem_size = reader.get_input_neuron_elem_size(); unsigned int updater_entry_count = static_cast(data_list.size()); @@ -91,7 +92,7 @@ namespace nnforge buffer_plain_size_configuration buffers_config; update_buffers_configuration(buffers_config, updater_entry_count); - buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(unsigned char)); // input + buffers_config.add_per_entry_buffer(input_neuron_count * input_neuron_elem_size); // input buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(float)); // converted input buffers_config.add_per_entry_buffer(output_neuron_count * sizeof(float)); // output buffers_config.add_constant_buffer(output_neuron_count * sizeof(float) * updater_entry_count); // temp_mse @@ -120,7 +121,7 @@ namespace nnforge unsigned int max_entry_count = std::min(std::min(plain_config->get_max_entry_count(buffers_config), reader.get_entry_count()), max_entry_count_in_single_batch); - std::vector input_buf(max_entry_count * input_neuron_count); + std::vector input_buf(max_entry_count * input_neuron_count * input_neuron_elem_size); std::vector actual_output_buf(max_entry_count * input_neuron_count); additional_buffer_smart_ptr initial_error_buf(new std::vector(updater_entry_count * output_neuron_count)); additional_buffer_smart_ptr temp_mse_buf(new std::vector(updater_entry_count * output_neuron_count, 0.0F)); @@ -179,7 +180,7 @@ namespace nnforge while(entries_available_for_processing_count < max_entry_count) { bool entry_read = reader.read( - &(*(input_buf.begin() + (input_neuron_count * entries_available_for_processing_count))), + &(*(input_buf.begin() + (input_neuron_count * entries_available_for_processing_count * input_neuron_elem_size))), &(*(actual_output_buf.begin() + (output_neuron_count * entries_available_for_processing_count)))); if (!entry_read) { @@ -196,26 +197,24 @@ namespace nnforge // Convert input { - const int elem_count = static_cast(const_entries_available_for_processing_count); + const int elem_count = static_cast(entries_available_for_processing_count * input_neuron_count); const std::vector::iterator input_converted_buf_it_start = input_converted_buf->begin(); - const std::vector::const_iterator input_buf_it_start = input_buf.begin(); - #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) - for(int i = 0; i < elem_count; ++i) + if (type_code == neuron_data_type::type_byte) { - std::vector::iterator input_converted_buf_it = input_converted_buf_it_start + (i * input_neuron_count); - std::vector::const_iterator input_buf_it = input_buf_it_start + (i * input_neuron_count); - for(unsigned int feature_map_id = 0; feature_map_id < input_feature_map_count; ++feature_map_id) - { - float addition = current_scale_params->addition_list[feature_map_id]; - float multiplication = current_scale_params->multiplication_list[feature_map_id]; - for(unsigned int j = 0; j < neuron_count_per_input_feature_map; ++j) - { - *input_converted_buf_it = ((static_cast(*input_buf_it) * (1.0F / 255.0F)) + addition) * multiplication; - input_converted_buf_it++; - input_buf_it++; - } - } + const unsigned char * const input_buf_it_start = &(*input_buf.begin()); + #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) + for(int i = 0; i < elem_count; ++i) + *(input_converted_buf_it_start + i) = static_cast(*(input_buf_it_start + i)) * (1.0F / 255.0F); + } + else if (type_code == neuron_data_type::type_float) + { + const float * const input_buf_it_start = reinterpret_cast(&(*input_buf.begin())); + #pragma omp parallel for default(none) schedule(guided) num_threads(plain_config->openmp_thread_count) + for(int i = 0; i < elem_count; ++i) + *(input_converted_buf_it_start + i) = *(input_buf_it_start + i); } + else + throw neural_network_exception((boost::format("actual_update cannot handle input neurons of type %1%") % type_code).str()); } // Run testing layers diff --git a/nnforge/plain/network_updater_plain.h b/nnforge/plain/network_updater_plain.h index b32390f..03576f4 100644 --- a/nnforge/plain/network_updater_plain.h +++ b/nnforge/plain/network_updater_plain.h @@ -32,7 +32,6 @@ namespace nnforge public: network_updater_plain( network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params, plain_running_configuration_const_smart_ptr plain_config); ~network_updater_plain(); @@ -42,7 +41,7 @@ namespace nnforge protected: // schema, data and reader are guaranteed to be compatible virtual std::vector actual_update( - supervised_data_reader_byte& reader, + supervised_data_reader& reader, const std::vector& training_speed_vector_list, std::vector& data_list, const std::map& layer_to_dropout_rate_map, diff --git a/nnforge/plain/network_updater_plain_factory.cpp b/nnforge/plain/network_updater_plain_factory.cpp index 1902a49..1f0cfa5 100644 --- a/nnforge/plain/network_updater_plain_factory.cpp +++ b/nnforge/plain/network_updater_plain_factory.cpp @@ -31,11 +31,9 @@ namespace nnforge { } - network_updater_smart_ptr network_updater_plain_factory::create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const + network_updater_smart_ptr network_updater_plain_factory::create(network_schema_smart_ptr schema) const { - return network_updater_smart_ptr(new network_updater_plain(schema, scale_params, plain_config)); + return network_updater_smart_ptr(new network_updater_plain(schema, plain_config)); } } } diff --git a/nnforge/plain/network_updater_plain_factory.h b/nnforge/plain/network_updater_plain_factory.h index 1e711d8..4c90a76 100644 --- a/nnforge/plain/network_updater_plain_factory.h +++ b/nnforge/plain/network_updater_plain_factory.h @@ -30,9 +30,7 @@ namespace nnforge virtual ~network_updater_plain_factory(); - virtual network_updater_smart_ptr create( - network_schema_smart_ptr schema, - const_data_scale_params_smart_ptr scale_params) const; + virtual network_updater_smart_ptr create(network_schema_smart_ptr schema) const; protected: plain_running_configuration_const_smart_ptr plain_config; diff --git a/nnforge/supervised_data_reader.cpp b/nnforge/supervised_data_reader.cpp new file mode 100644 index 0000000..3ec272d --- /dev/null +++ b/nnforge/supervised_data_reader.cpp @@ -0,0 +1,54 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "supervised_data_reader.h" + +#include + +namespace nnforge +{ + supervised_data_reader::supervised_data_reader() + { + } + + supervised_data_reader::~supervised_data_reader() + { + } + + size_t supervised_data_reader::get_input_neuron_elem_size() const + { + return neuron_data_type::get_input_size(get_input_type()); + } + + output_neuron_value_set_smart_ptr supervised_data_reader::get_output_neuron_value_set() + { + reset(); + + unsigned int entry_count = get_entry_count(); + unsigned int output_neuron_count = get_output_configuration().get_neuron_count(); + + output_neuron_value_set_smart_ptr res(new output_neuron_value_set(entry_count, output_neuron_count)); + + for(std::vector >::iterator it = res->neuron_value_list.begin(); it != res->neuron_value_list.end(); it++) + { + std::vector& output_neurons = *it; + + read(0, &(*output_neurons.begin())); + } + + return res; + } +} diff --git a/nnforge/supervised_data_reader.h b/nnforge/supervised_data_reader.h index 66b999b..4bcf2bf 100644 --- a/nnforge/supervised_data_reader.h +++ b/nnforge/supervised_data_reader.h @@ -18,26 +18,19 @@ #include "layer_configuration_specific.h" #include "output_neuron_value_set.h" - -#include -#include -#include -#include +#include "neuron_data_type.h" namespace nnforge { - template class supervised_data_reader + class supervised_data_reader { public: - - virtual ~supervised_data_reader() - { - } + virtual ~supervised_data_reader(); // The method should return true in case entry is read and false if there is no more entries available (and no entry is read in this case) // If any parameter is null the method should just discard corresponding data virtual bool read( - input_data_type * input_elems, + void * input_elems, float * output_elems) = 0; virtual void reset() = 0; @@ -48,130 +41,19 @@ namespace nnforge virtual unsigned int get_entry_count() const = 0; - std::vector get_feature_map_average() - { - reset(); - - unsigned int feature_map_count = get_input_configuration().feature_map_count; - unsigned int neuron_count_per_feature_map = get_input_configuration().get_neuron_count_per_feature_map(); - std::vector res(feature_map_count, 0.0F); - std::vector inp(get_input_configuration().get_neuron_count()); - while (read(&(*inp.begin()), 0)) - { - for(unsigned int feature_map_id = 0; feature_map_id < feature_map_count; ++feature_map_id) - { - float sum_local = 0.0F; - for(typename std::vector::const_iterator it = inp.begin() + feature_map_id * neuron_count_per_feature_map; - it != inp.begin() + (feature_map_id + 1) * neuron_count_per_feature_map; - ++it) - { - sum_local += (static_cast(*it) * (1.0F / 255.0F)); - } - res[feature_map_id] += sum_local; - } - } - - float mult = 1.0F / static_cast(get_entry_count() * neuron_count_per_feature_map); - for(std::vector::iterator it = res.begin(); it != res.end(); ++it) - *it *= mult; - - return res; - } - - std::vector > get_feature_map_min_max() - { - reset(); - - unsigned int feature_map_count = get_input_configuration().feature_map_count; - unsigned int neuron_count_per_feature_map = get_input_configuration().get_neuron_count_per_feature_map(); - std::vector > res(feature_map_count, std::make_pair(1.0e37F, -1.0e37F)); - std::vector inp(get_input_configuration().get_neuron_count()); - while (read(&(*inp.begin()), 0)) - { - for(unsigned int feature_map_id = 0; feature_map_id < feature_map_count; ++feature_map_id) - { - float min_local = 1.0e37F; - float max_local = -1.0e37F; - for(typename std::vector::const_iterator it = inp.begin() + feature_map_id * neuron_count_per_feature_map; - it != inp.begin() + (feature_map_id + 1) * neuron_count_per_feature_map; - ++it) - { - float val = static_cast(*it) * (1.0F / 255.0F); - min_local = std::min(min_local, val); - max_local = std::max(max_local, val); - } - res[feature_map_id].first = std::min(res[feature_map_id].first, min_local); - res[feature_map_id].second = std::max(res[feature_map_id].second, max_local); - } - } - - return res; - } - - std::vector get_feature_map_std_dev(const std::vector& avg) - { - reset(); + virtual neuron_data_type::input_type get_input_type() const = 0; - unsigned int feature_map_count = get_input_configuration().feature_map_count; - unsigned int neuron_count_per_feature_map = get_input_configuration().get_neuron_count_per_feature_map(); - std::vector res(feature_map_count, 0.0F); - std::vector inp(get_input_configuration().get_neuron_count()); - while (read(&(*inp.begin()), 0)) - { - for(unsigned int feature_map_id = 0; feature_map_id < feature_map_count; ++feature_map_id) - { - float sum_local = 0.0F; - float current_avg = avg[feature_map_id]; - for(typename std::vector::const_iterator it = inp.begin() + feature_map_id * neuron_count_per_feature_map; - it != inp.begin() + (feature_map_id + 1) * neuron_count_per_feature_map; - ++it) - { - float val = (static_cast(*it) * (1.0F / 255.0F)); - float diff = val - current_avg; - sum_local += diff * diff; - } - res[feature_map_id] += sum_local; - } - } + size_t get_input_neuron_elem_size() const; - float mult = 1.0F / static_cast(get_entry_count() * neuron_count_per_feature_map); - for(std::vector::iterator it = res.begin(); it != res.end(); ++it) - *it = sqrtf(*it * mult); - - return res; - } - - output_neuron_value_set_smart_ptr get_output_neuron_value_set() - { - reset(); - - unsigned int entry_count = get_entry_count(); - unsigned int output_neuron_count = get_output_configuration().get_neuron_count(); - - output_neuron_value_set_smart_ptr res(new output_neuron_value_set(entry_count, output_neuron_count)); - - for(std::vector >::iterator it = res->neuron_value_list.begin(); it != res->neuron_value_list.end(); it++) - { - std::vector& output_neurons = *it; - - read(0, &(*output_neurons.begin())); - } - - return res; - } + output_neuron_value_set_smart_ptr get_output_neuron_value_set(); protected: - supervised_data_reader() - { - } + supervised_data_reader(); private: supervised_data_reader(const supervised_data_reader&); supervised_data_reader& operator =(const supervised_data_reader&); }; - typedef supervised_data_reader supervised_data_reader_byte; - typedef supervised_data_reader supervised_data_reader_float; - - typedef std::tr1::shared_ptr supervised_data_reader_byte_smart_ptr; + typedef std::tr1::shared_ptr supervised_data_reader_smart_ptr; } diff --git a/nnforge/supervised_data_stream_reader.cpp b/nnforge/supervised_data_stream_reader.cpp index 8819c2f..64f7dad 100644 --- a/nnforge/supervised_data_stream_reader.cpp +++ b/nnforge/supervised_data_stream_reader.cpp @@ -23,11 +23,9 @@ namespace nnforge { - supervised_data_stream_reader_base::supervised_data_stream_reader_base( - std::tr1::shared_ptr input_stream, - size_t input_elem_size, - unsigned int type_code) - : in_stream(input_stream), input_elem_size(input_elem_size), entry_read_count(0) + supervised_data_stream_reader::supervised_data_stream_reader(std::tr1::shared_ptr input_stream) + : in_stream(input_stream) + , entry_read_count(0) { in_stream->exceptions(std::ostream::eofbit | std::ostream::failbit | std::ostream::badbit); @@ -44,48 +42,160 @@ namespace nnforge unsigned int type_code_read; in_stream->read(reinterpret_cast(&type_code_read), sizeof(type_code_read)); - if (type_code_read != type_code) - throw neural_network_exception((boost::format("Unexpected type code encountered in input stream: %1%") % type_code_read).str()); + type_code = static_cast(type_code_read); in_stream->read(reinterpret_cast(&entry_count), sizeof(entry_count)); reset_pos = in_stream->tellg(); } - supervised_data_stream_reader_base::~supervised_data_stream_reader_base() + supervised_data_stream_reader::~supervised_data_stream_reader() { } - void supervised_data_stream_reader_base::reset() + void supervised_data_stream_reader::reset() { in_stream->seekg(reset_pos); entry_read_count = 0; } - void supervised_data_stream_reader_base::rewind(unsigned int entry_id) + bool supervised_data_stream_reader::read( + void * input_neurons, + float * output_neurons) { - in_stream->seekg(reset_pos); - in_stream->seekg((std::istream::off_type)entry_id * (std::istream::off_type)((input_elem_size * input_neuron_count) + (sizeof(float) * output_neuron_count)), std::ios::cur); + if (!entry_available()) + return false; - entry_read_count = entry_id; - } + if (input_neurons) + in_stream->read(reinterpret_cast(input_neurons), get_input_neuron_elem_size() * input_neuron_count); + else + in_stream->seekg(get_input_neuron_elem_size() * input_neuron_count, std::ios_base::cur); - void supervised_data_stream_reader_base::read_output(float * output_neurons) - { if (output_neurons) in_stream->read(reinterpret_cast(output_neurons), sizeof(*output_neurons) * output_neuron_count); else in_stream->seekg(sizeof(*output_neurons) * output_neuron_count, std::ios_base::cur); entry_read_count++; + + return true; } - bool supervised_data_stream_reader_base::entry_available() + bool supervised_data_stream_reader::entry_available() { return (entry_read_count < entry_count); } + void supervised_data_stream_reader::rewind(unsigned int entry_id) + { + in_stream->seekg(reset_pos); + in_stream->seekg((std::istream::off_type)entry_id * (std::istream::off_type)((get_input_neuron_elem_size() * input_neuron_count) + (sizeof(float) * output_neuron_count)), std::ios::cur); + + entry_read_count = entry_id; + } + + void supervised_data_stream_reader::write_randomized(std::tr1::shared_ptr output_stream) + { + supervised_data_stream_writer sw( + output_stream, + input_configuration, + output_configuration); + + if (entry_count == 0) + return; + + random_generator rnd = rnd::get_random_generator(); + + std::vector entry_to_write_list(entry_count); + for(unsigned int i = 0; i < entry_count; ++i) + { + entry_to_write_list[i] = i; + } + + std::vector in(input_neuron_count * get_input_neuron_elem_size()); + std::vector out(output_neuron_count); + + for(unsigned int entry_to_write_count = entry_count; entry_to_write_count > 0; --entry_to_write_count) + { + std::tr1::uniform_int dist(0, entry_to_write_count - 1); + + unsigned int index = dist(rnd); + unsigned int entry_id = entry_to_write_list[index]; + + rewind(entry_id); + read(&(*in.begin()), &(*out.begin())); + sw.write(type_code, (const void *)(&(*in.begin())), &(*out.begin())); + + unsigned int leftover_entry_id = entry_to_write_list[entry_to_write_count - 1]; + entry_to_write_list[index] = leftover_entry_id; + } + } + + void supervised_data_stream_reader::write_randomized_classifier(std::tr1::shared_ptr output_stream) + { + supervised_data_stream_writer sw( + output_stream, + input_configuration, + output_configuration); + + if (entry_count == 0) + return; + + random_generator rnd = rnd::get_random_generator(); + + std::vector class_buckets_entry_id_lists; + fill_class_buckets_entry_id_lists(class_buckets_entry_id_lists); + + std::vector in(input_neuron_count * get_input_neuron_elem_size()); + std::vector out(output_neuron_count); + + for(unsigned int entry_to_write_count = entry_count; entry_to_write_count > 0; --entry_to_write_count) + { + std::vector::iterator bucket_it = class_buckets_entry_id_lists.begin(); + float best_ratio = 0.0F; + for(std::vector::iterator it = class_buckets_entry_id_lists.begin(); it != class_buckets_entry_id_lists.end(); ++it) + { + float new_ratio = it->get_ratio(); + if (new_ratio > best_ratio) + { + bucket_it = it; + best_ratio = new_ratio; + } + } + + if (bucket_it->is_empty()) + throw neural_network_exception("Unexpected error in write_randomized_classifier: No elements left"); + + unsigned int entry_id = bucket_it->peek_random(rnd); + + rewind(entry_id); + read(&(*in.begin()), &(*out.begin())); + sw.write(type_code, (const void *)(&(*in.begin())), &(*out.begin())); + } + } + + void supervised_data_stream_reader::fill_class_buckets_entry_id_lists(std::vector& class_buckets_entry_id_lists) + { + class_buckets_entry_id_lists.resize(output_neuron_count + 1); + + std::vector output(output_neuron_count); + + unsigned int entry_id = 0; + while (read(0, &(*output.begin()))) + { + float min_value = *std::min_element(output.begin(), output.end()); + std::vector::iterator max_elem = std::max_element(output.begin(), output.end()); + + if ((min_value < *max_elem) || (*max_elem > 0.0F)) + class_buckets_entry_id_lists[max_elem - output.begin()].push(entry_id); + else + class_buckets_entry_id_lists[output.size()].push(entry_id); + + ++entry_id; + } + } + randomized_classifier_keeper::randomized_classifier_keeper() : pushed_count(0) , remaining_ratio(0.0F) diff --git a/nnforge/supervised_data_stream_reader.h b/nnforge/supervised_data_stream_reader.h index f0a9a1e..ab676b9 100644 --- a/nnforge/supervised_data_stream_reader.h +++ b/nnforge/supervised_data_stream_reader.h @@ -19,54 +19,17 @@ #include "supervised_data_reader.h" #include "supervised_data_stream_schema.h" #include "supervised_data_stream_writer.h" -#include "rnd.h" #include "neural_network_exception.h" +#include "rnd.h" +#include "neuron_data_type.h" #include #include #include #include -#include namespace nnforge { - class supervised_data_stream_reader_base - { - public: - void rewind(unsigned int entry_id); - - protected: - supervised_data_stream_reader_base( - std::tr1::shared_ptr input_stream, - size_t input_elem_size, - unsigned int type_code); - - ~supervised_data_stream_reader_base(); - - void read_output(float * output_neurons); - - bool entry_available(); - - void reset(); - - std::tr1::shared_ptr in_stream; - - unsigned int input_neuron_count; - unsigned int output_neuron_count; - layer_configuration_specific input_configuration; - layer_configuration_specific output_configuration; - unsigned int entry_count; - - private: - unsigned int entry_read_count; - - std::istream::pos_type reset_pos; - - size_t input_elem_size; - - supervised_data_stream_reader_base(); - }; - class randomized_classifier_keeper { public: @@ -88,120 +51,19 @@ namespace nnforge void update_ratio(); }; - template class supervised_data_stream_reader : public supervised_data_reader, public supervised_data_stream_reader_base + class supervised_data_stream_reader : public supervised_data_reader { public: - // The constructor modifies output_stream to throw exceptions in case of failure - supervised_data_stream_reader(std::tr1::shared_ptr input_stream) - : supervised_data_stream_reader_base(input_stream, sizeof(input_data_type), data_type_code) - { - } - - virtual ~supervised_data_stream_reader() - { - } - - virtual bool read( - input_data_type * input_neurons, - float * output_neurons) - { - if (!entry_available()) - return false; + // The constructor modifies input_stream to throw exceptions in case of failure + supervised_data_stream_reader(std::tr1::shared_ptr input_stream); - if (input_neurons) - in_stream->read(reinterpret_cast(input_neurons), sizeof(*input_neurons) * input_neuron_count); - else - in_stream->seekg(sizeof(*input_neurons) * input_neuron_count, std::ios_base::cur); + virtual ~supervised_data_stream_reader(); - supervised_data_stream_reader_base::read_output(output_neurons); + virtual void reset(); - return true; - } - - void write_randomized(std::tr1::shared_ptr output_stream) - { - supervised_data_stream_writer sw( - output_stream, - input_configuration, - output_configuration); - - if (entry_count == 0) - return; - - random_generator rnd = rnd::get_random_generator(); - - std::vector entry_to_write_list(entry_count); - for(unsigned int i = 0; i < entry_count; ++i) - { - entry_to_write_list[i] = i; - } - - std::vector in(input_neuron_count); - std::vector out(output_neuron_count); - - for(unsigned int entry_to_write_count = entry_count; entry_to_write_count > 0; --entry_to_write_count) - { - std::tr1::uniform_int dist(0, entry_to_write_count - 1); - - unsigned int index = dist(rnd); - unsigned int entry_id = entry_to_write_list[index]; - - rewind(entry_id); - read(&(*in.begin()), &(*out.begin())); - sw.write(&(*in.begin()), &(*out.begin())); - - unsigned int leftover_entry_id = entry_to_write_list[entry_to_write_count - 1]; - entry_to_write_list[index] = leftover_entry_id; - } - } - - void write_randomized_classifier(std::tr1::shared_ptr output_stream) - { - supervised_data_stream_writer sw( - output_stream, - input_configuration, - output_configuration); - - if (entry_count == 0) - return; - - random_generator rnd = rnd::get_random_generator(); - - std::vector class_buckets_entry_id_lists; - fill_class_buckets_entry_id_lists(class_buckets_entry_id_lists); - - std::vector in(input_neuron_count); - std::vector out(output_neuron_count); - - for(unsigned int entry_to_write_count = entry_count; entry_to_write_count > 0; --entry_to_write_count) - { - std::vector::iterator bucket_it = class_buckets_entry_id_lists.begin(); - float best_ratio = 0.0F; - for(std::vector::iterator it = class_buckets_entry_id_lists.begin(); it != class_buckets_entry_id_lists.end(); ++it) - { - float new_ratio = it->get_ratio(); - if (new_ratio > best_ratio) - { - bucket_it = it; - best_ratio = new_ratio; - } - } - - if (bucket_it->is_empty()) - throw neural_network_exception("Unexpected error in write_randomized_classifier: No elements left"); - - unsigned int entry_id = bucket_it->peek_random(rnd); - - rewind(entry_id); - read(&(*in.begin()), &(*out.begin())); - sw.write(&(*in.begin()), &(*out.begin())); - } - } - - virtual void reset() - { - supervised_data_stream_reader_base::reset(); - } + virtual bool read( + void * input_neurons, + float * output_neurons); virtual layer_configuration_specific get_input_configuration() const { @@ -218,33 +80,38 @@ namespace nnforge return entry_count; } - protected: - void fill_class_buckets_entry_id_lists(std::vector& class_buckets_entry_id_lists) + virtual neuron_data_type::input_type get_input_type() const { - class_buckets_entry_id_lists.resize(output_neuron_count + 1); + return type_code; + } - std::vector output(output_neuron_count); + void write_randomized(std::tr1::shared_ptr output_stream); - unsigned int entry_id = 0; - while (read(0, &(*output.begin()))) - { - float min_value = *std::min_element(output.begin(), output.end()); - std::vector::iterator max_elem = std::max_element(output.begin(), output.end()); + void write_randomized_classifier(std::tr1::shared_ptr output_stream); - if ((min_value < *max_elem) || (*max_elem > 0.0F)) - class_buckets_entry_id_lists[max_elem - output.begin()].push(entry_id); - else - class_buckets_entry_id_lists[output.size()].push(entry_id); + protected: + bool entry_available(); - ++entry_id; - } - } + void rewind(unsigned int entry_id); + + void fill_class_buckets_entry_id_lists(std::vector& class_buckets_entry_id_lists); + + protected: + std::tr1::shared_ptr in_stream; + unsigned int input_neuron_count; + unsigned int output_neuron_count; + layer_configuration_specific input_configuration; + layer_configuration_specific output_configuration; + neuron_data_type::input_type type_code; + unsigned int entry_count; + + unsigned int entry_read_count; + std::istream::pos_type reset_pos; private: supervised_data_stream_reader(const supervised_data_stream_reader&); supervised_data_stream_reader& operator =(const supervised_data_stream_reader&); }; - typedef supervised_data_stream_reader supervised_data_stream_reader_byte; - typedef supervised_data_stream_reader supervised_data_stream_reader_float; + typedef std::tr1::shared_ptr supervised_data_stream_reader_smart_ptr; } diff --git a/nnforge/supervised_data_stream_schema.h b/nnforge/supervised_data_stream_schema.h index 5d6e832..154fa6a 100644 --- a/nnforge/supervised_data_stream_schema.h +++ b/nnforge/supervised_data_stream_schema.h @@ -22,16 +22,12 @@ namespace nnforge { class supervised_data_stream_schema { - private: - supervised_data_stream_schema(); - public: static const boost::uuids::uuid supervised_data_stream_guid; - enum input_type - { - type_char = 1, - type_float = 2 - }; + private: + supervised_data_stream_schema(); + supervised_data_stream_schema(const supervised_data_stream_schema&); + supervised_data_stream_schema& operator =(const supervised_data_stream_schema&); }; } diff --git a/nnforge/supervised_data_stream_writer.cpp b/nnforge/supervised_data_stream_writer.cpp index 493be30..53b0f74 100644 --- a/nnforge/supervised_data_stream_writer.cpp +++ b/nnforge/supervised_data_stream_writer.cpp @@ -16,16 +16,19 @@ #include "supervised_data_stream_writer.h" +#include "neural_network_exception.h" + +#include + namespace nnforge { - supervised_data_stream_writer_base::supervised_data_stream_writer_base( + supervised_data_stream_writer::supervised_data_stream_writer( std::tr1::shared_ptr output_stream, const layer_configuration_specific& input_configuration, - const layer_configuration_specific& output_configuration, - unsigned int type_code) - : out_stream(output_stream), entry_count(0) + const layer_configuration_specific& output_configuration) + : out_stream(output_stream), entry_count(0), type_code(neuron_data_type::type_unknown) { - out_stream->exceptions(std::ostream::eofbit | std::ostream::failbit | std::ostream::badbit); + out_stream->exceptions(std::ostream::failbit | std::ostream::badbit); input_neuron_count = input_configuration.get_neuron_count(); output_neuron_count = output_configuration.get_neuron_count(); @@ -36,28 +39,82 @@ namespace nnforge output_configuration.write(*out_stream); + type_code_pos = out_stream->tellp(); out_stream->write(reinterpret_cast(&type_code), sizeof(type_code)); entry_count_pos = out_stream->tellp(); - out_stream->write(reinterpret_cast(&entry_count), sizeof(entry_count)); } - supervised_data_stream_writer_base::~supervised_data_stream_writer_base() + supervised_data_stream_writer::~supervised_data_stream_writer() { - // write entry count std::ostream::pos_type current_pos = out_stream->tellp(); + + // write type code + out_stream->seekp(type_code_pos); + if (type_code == neuron_data_type::type_unknown) + type_code == neuron_data_type::type_byte; + unsigned int t = static_cast(type_code); + out_stream->write(reinterpret_cast(&t), sizeof(t)); + + // write entry count out_stream->seekp(entry_count_pos); out_stream->write(reinterpret_cast(&entry_count), sizeof(entry_count)); + out_stream->seekp(current_pos); out_stream->flush(); } - void supervised_data_stream_writer_base::write_output(const float * output_neurons) + void supervised_data_stream_writer::write( + neuron_data_type::input_type type_code, + const void * input_neurons, + const float * output_neurons) + { + if (this->type_code == neuron_data_type::type_unknown) + { + this->type_code = type_code; + input_elem_size = neuron_data_type::get_input_size(this->type_code); + } + else if (this->type_code != type_code) + throw neural_network_exception((boost::format("Cannot write elements with different input type: %1% %2%") % this->type_code % type_code).str()); + + out_stream->write(reinterpret_cast(input_neurons), input_elem_size * input_neuron_count); + out_stream->write(reinterpret_cast(output_neurons), sizeof(*output_neurons) * output_neuron_count); + entry_count++; + } + + void supervised_data_stream_writer::write( + const unsigned char * input_neurons, + const float * output_neurons) { + if (type_code == neuron_data_type::type_unknown) + { + type_code = neuron_data_type::type_byte; + input_elem_size = neuron_data_type::get_input_size(type_code); + } + else if (type_code != neuron_data_type::type_byte) + throw neural_network_exception((boost::format("Cannot write elements with different input type: %1% %2%") % type_code % neuron_data_type::type_byte).str()); + + out_stream->write(reinterpret_cast(input_neurons), input_elem_size * input_neuron_count); out_stream->write(reinterpret_cast(output_neurons), sizeof(*output_neurons) * output_neuron_count); + entry_count++; + } + void supervised_data_stream_writer::write( + const float * input_neurons, + const float * output_neurons) + { + if (type_code == neuron_data_type::type_unknown) + { + type_code = neuron_data_type::type_float; + input_elem_size = neuron_data_type::get_input_size(type_code); + } + else if (type_code != neuron_data_type::type_float) + throw neural_network_exception((boost::format("Cannot write elements with different input type: %1% %2%") % type_code % neuron_data_type::type_float).str()); + + out_stream->write(reinterpret_cast(input_neurons), input_elem_size * input_neuron_count); + out_stream->write(reinterpret_cast(output_neurons), sizeof(*output_neurons) * output_neuron_count); entry_count++; } } diff --git a/nnforge/supervised_data_stream_writer.h b/nnforge/supervised_data_stream_writer.h index c22f5a5..bb4a249 100644 --- a/nnforge/supervised_data_stream_writer.h +++ b/nnforge/supervised_data_stream_writer.h @@ -18,69 +18,55 @@ #include "supervised_data_stream_schema.h" #include "layer_configuration_specific.h" +#include "neuron_data_type.h" #include #include #include -#include namespace nnforge { - class supervised_data_stream_writer_base + class supervised_data_stream_writer { - protected: + public: + // The constructor modifies output_stream to throw exceptions in case of failure // The stream should be created with std::ios_base::binary flag - supervised_data_stream_writer_base( + supervised_data_stream_writer( std::tr1::shared_ptr output_stream, const layer_configuration_specific& input_configuration, - const layer_configuration_specific& output_configuration, - unsigned int type_code); + const layer_configuration_specific& output_configuration); - virtual ~supervised_data_stream_writer_base(); + virtual ~supervised_data_stream_writer(); - void write_output(const float * output_elems); + void write( + neuron_data_type::input_type type_code, + const void * input_neurons, + const float * output_neurons); + + void write( + const float * input_neurons, + const float * output_neurons); + void write( + const unsigned char * input_neurons, + const float * output_neurons); + + private: std::tr1::shared_ptr out_stream; unsigned int input_neuron_count; unsigned int output_neuron_count; - private: - supervised_data_stream_writer_base(); + std::ostream::pos_type type_code_pos; + neuron_data_type::input_type type_code; + size_t input_elem_size; std::ostream::pos_type entry_count_pos; unsigned int entry_count; - }; - - template class supervised_data_stream_writer : public supervised_data_stream_writer_base - { - public: - // The constructor modifies output_stream to throw exceptions in case of failure - supervised_data_stream_writer( - std::tr1::shared_ptr output_stream, - const layer_configuration_specific& input_configuration, - const layer_configuration_specific& output_configuration) - : supervised_data_stream_writer_base(output_stream, input_configuration, output_configuration, data_type_code) - { - } - - virtual ~supervised_data_stream_writer() - { - } - - void write( - const input_data_type * input_neurons, - const float * output_neurons) - { - out_stream->write(reinterpret_cast(input_neurons), sizeof(*input_neurons) * input_neuron_count); - - supervised_data_stream_writer_base::write_output(output_neurons); - } private: supervised_data_stream_writer(const supervised_data_stream_writer&); supervised_data_stream_writer& operator =(const supervised_data_stream_writer&); }; - typedef supervised_data_stream_writer supervised_data_stream_writer_byte; - typedef supervised_data_stream_writer supervised_data_stream_writer_float; + typedef std::tr1::shared_ptr supervised_data_stream_writer_smart_ptr; } diff --git a/nnforge/unsupervised_data_reader.cpp b/nnforge/unsupervised_data_reader.cpp new file mode 100644 index 0000000..c336c89 --- /dev/null +++ b/nnforge/unsupervised_data_reader.cpp @@ -0,0 +1,35 @@ +/* + * Copyright 2011-2013 Maxim Milakov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "unsupervised_data_reader.h" + +#include + +namespace nnforge +{ + unsupervised_data_reader::unsupervised_data_reader() + { + } + + unsupervised_data_reader::~unsupervised_data_reader() + { + } + + size_t unsupervised_data_reader::get_input_neuron_elem_size() const + { + return neuron_data_type::get_input_size(get_input_type()); + } +} diff --git a/nnforge/unsupervised_data_reader.h b/nnforge/unsupervised_data_reader.h index 60c1f5e..f72949a 100644 --- a/nnforge/unsupervised_data_reader.h +++ b/nnforge/unsupervised_data_reader.h @@ -18,22 +18,18 @@ #include "layer_configuration_specific.h" #include "output_neuron_value_set.h" - -#include +#include "neuron_data_type.h" namespace nnforge { - template class unsupervised_data_reader + class unsupervised_data_reader { public: - - virtual ~unsupervised_data_reader() - { - } + virtual ~unsupervised_data_reader(); // The method should return true in case entry is read and false if there is no more entries available (and no entry is read in this case) // If any parameter is null the method should just discard corresponding data - virtual bool read(input_data_type * input_elems) = 0; + virtual bool read(void * input_elems) = 0; virtual void reset() = 0; @@ -41,18 +37,17 @@ namespace nnforge virtual unsigned int get_entry_count() const = 0; + virtual neuron_data_type::input_type get_input_type() const = 0; + + size_t get_input_neuron_elem_size() const; + protected: - unsupervised_data_reader() - { - } + unsupervised_data_reader(); private: unsupervised_data_reader(const unsupervised_data_reader&); unsupervised_data_reader& operator =(const unsupervised_data_reader&); }; - typedef unsupervised_data_reader unsupervised_data_reader_byte; - typedef unsupervised_data_reader unsupervised_data_reader_float; - - typedef std::tr1::shared_ptr unsupervised_data_reader_byte_smart_ptr; + typedef std::tr1::shared_ptr unsupervised_data_reader_smart_ptr; } diff --git a/nnforge/unsupervised_data_stream_reader.cpp b/nnforge/unsupervised_data_stream_reader.cpp index 7e2417c..b1576e0 100644 --- a/nnforge/unsupervised_data_stream_reader.cpp +++ b/nnforge/unsupervised_data_stream_reader.cpp @@ -23,11 +23,9 @@ namespace nnforge { - unsupervised_data_stream_reader_base::unsupervised_data_stream_reader_base( - std::tr1::shared_ptr input_stream, - size_t input_elem_size, - unsigned int type_code) - : in_stream(input_stream), input_elem_size(input_elem_size), entry_read_count(0) + unsupervised_data_stream_reader::unsupervised_data_stream_reader(std::tr1::shared_ptr input_stream) + : in_stream(input_stream) + , entry_read_count(0) { in_stream->exceptions(std::ostream::eofbit | std::ostream::failbit | std::ostream::badbit); @@ -42,32 +40,42 @@ namespace nnforge unsigned int type_code_read; in_stream->read(reinterpret_cast(&type_code_read), sizeof(type_code_read)); - if (type_code_read != type_code) - throw neural_network_exception((boost::format("Unexpected type code encountered in input stream: %1%") % type_code_read).str()); + type_code = static_cast(type_code_read); in_stream->read(reinterpret_cast(&entry_count), sizeof(entry_count)); reset_pos = in_stream->tellg(); } - unsupervised_data_stream_reader_base::~unsupervised_data_stream_reader_base() + unsupervised_data_stream_reader::~unsupervised_data_stream_reader() { } - void unsupervised_data_stream_reader_base::reset() + void unsupervised_data_stream_reader::reset() { in_stream->seekg(reset_pos); entry_read_count = 0; } - bool unsupervised_data_stream_reader_base::entry_available() + bool unsupervised_data_stream_reader::read(void * input_neurons) { - return (entry_read_count < entry_count); + if (!entry_available()) + return false; + + if (input_neurons) + in_stream->read(reinterpret_cast(input_neurons), get_input_neuron_elem_size() * input_neuron_count); + else + in_stream->seekg(get_input_neuron_elem_size() * input_neuron_count, std::ios_base::cur); + + entry_read_count++; + + return true; } - void unsupervised_data_stream_reader_base::notify_read() + bool unsupervised_data_stream_reader::entry_available() { - entry_read_count++; + return (entry_read_count < entry_count); } + } diff --git a/nnforge/unsupervised_data_stream_reader.h b/nnforge/unsupervised_data_stream_reader.h index 42d4937..f8e9db2 100644 --- a/nnforge/unsupervised_data_stream_reader.h +++ b/nnforge/unsupervised_data_stream_reader.h @@ -18,95 +18,60 @@ #include "unsupervised_data_reader.h" #include "unsupervised_data_stream_schema.h" -#include "rnd.h" #include "neural_network_exception.h" +#include "neuron_data_type.h" #include +#include #include +#include namespace nnforge { - class unsupervised_data_stream_reader_base + class unsupervised_data_stream_reader : public unsupervised_data_reader { - protected: - unsupervised_data_stream_reader_base( - std::tr1::shared_ptr input_stream, - size_t input_elem_size, - unsigned int type_code); - - ~unsupervised_data_stream_reader_base(); - - bool entry_available(); - - void reset(); - - std::tr1::shared_ptr in_stream; - - unsigned int input_neuron_count; - layer_configuration_specific input_configuration; - unsigned int entry_count; - - protected: - void notify_read(); - - private: - unsigned int entry_read_count; + public: + // The constructor modifies input_stream to throw exceptions in case of failure + unsupervised_data_stream_reader(std::tr1::shared_ptr input_stream); - std::istream::pos_type reset_pos; + virtual ~unsupervised_data_stream_reader(); - size_t input_elem_size; + virtual void reset(); - unsupervised_data_stream_reader_base(); - }; + virtual bool read(void * input_neurons); - template class unsupervised_data_stream_reader : public unsupervised_data_reader, public unsupervised_data_stream_reader_base - { - public: - // The constructor modifies output_stream to throw exceptions in case of failure - unsupervised_data_stream_reader(std::tr1::shared_ptr input_stream) - : unsupervised_data_stream_reader_base(input_stream, sizeof(input_data_type), data_type_code) + virtual layer_configuration_specific get_input_configuration() const { + return input_configuration; } - virtual ~unsupervised_data_stream_reader() + virtual unsigned int get_entry_count() const { + return entry_count; } - virtual bool read(input_data_type * input_neurons) + virtual neuron_data_type::input_type get_input_type() const { - if (!entry_available()) - return false; - - if (input_neurons) - in_stream->read(reinterpret_cast(input_neurons), sizeof(*input_neurons) * input_neuron_count); - else - in_stream->seekg(sizeof(*input_neurons) * input_neuron_count, std::ios_base::cur); - - unsupervised_data_stream_reader_base::notify_read(); - - return true; + return type_code; } - virtual void reset() - { - unsupervised_data_stream_reader_base::reset(); - } + protected: + bool entry_available(); - virtual layer_configuration_specific get_input_configuration() const - { - return input_configuration; - } + protected: + std::tr1::shared_ptr in_stream; + unsigned int input_neuron_count; + layer_configuration_specific input_configuration; + neuron_data_type::input_type type_code; + unsigned int entry_count; - virtual unsigned int get_entry_count() const - { - return entry_count; - } + unsigned int entry_read_count; + std::istream::pos_type reset_pos; private: unsupervised_data_stream_reader(const unsupervised_data_stream_reader&); unsupervised_data_stream_reader& operator =(const unsupervised_data_stream_reader&); }; - typedef unsupervised_data_stream_reader unsupervised_data_stream_reader_byte; - typedef unsupervised_data_stream_reader unsupervised_data_stream_reader_float; + typedef std::tr1::shared_ptr unsupervised_data_stream_reader_smart_ptr; } diff --git a/nnforge/unsupervised_data_stream_schema.h b/nnforge/unsupervised_data_stream_schema.h index 787e552..538873b 100644 --- a/nnforge/unsupervised_data_stream_schema.h +++ b/nnforge/unsupervised_data_stream_schema.h @@ -22,16 +22,12 @@ namespace nnforge { class unsupervised_data_stream_schema { - private: - unsupervised_data_stream_schema(); - public: static const boost::uuids::uuid unsupervised_data_stream_guid; - enum input_type - { - type_char = 1, - type_float = 2 - }; + private: + unsupervised_data_stream_schema(); + unsupervised_data_stream_schema(const unsupervised_data_stream_schema&); + unsupervised_data_stream_schema& operator =(const unsupervised_data_stream_schema&); }; } diff --git a/nnforge/unsupervised_data_stream_writer.cpp b/nnforge/unsupervised_data_stream_writer.cpp index a928a85..386b096 100644 --- a/nnforge/unsupervised_data_stream_writer.cpp +++ b/nnforge/unsupervised_data_stream_writer.cpp @@ -16,15 +16,18 @@ #include "unsupervised_data_stream_writer.h" +#include "neural_network_exception.h" + +#include + namespace nnforge { - unsupervised_data_stream_writer_base::unsupervised_data_stream_writer_base( + unsupervised_data_stream_writer::unsupervised_data_stream_writer( std::tr1::shared_ptr output_stream, - const layer_configuration_specific& input_configuration, - unsigned int type_code) - : out_stream(output_stream), entry_count(0) + const layer_configuration_specific& input_configuration) + : out_stream(output_stream), entry_count(0), type_code(neuron_data_type::type_unknown) { - out_stream->exceptions(std::ostream::eofbit | std::ostream::failbit | std::ostream::badbit); + out_stream->exceptions(std::ostream::failbit | std::ostream::badbit); input_neuron_count = input_configuration.get_neuron_count(); @@ -32,26 +35,74 @@ namespace nnforge input_configuration.write(*out_stream); + type_code_pos = out_stream->tellp(); out_stream->write(reinterpret_cast(&type_code), sizeof(type_code)); entry_count_pos = out_stream->tellp(); - out_stream->write(reinterpret_cast(&entry_count), sizeof(entry_count)); } - unsupervised_data_stream_writer_base::~unsupervised_data_stream_writer_base() + unsupervised_data_stream_writer::~unsupervised_data_stream_writer() { - // write entry count std::ostream::pos_type current_pos = out_stream->tellp(); + + // write type code + out_stream->seekp(type_code_pos); + if (type_code == neuron_data_type::type_unknown) + type_code == neuron_data_type::type_byte; + unsigned int t = static_cast(type_code); + out_stream->write(reinterpret_cast(&t), sizeof(t)); + + // write entry count out_stream->seekp(entry_count_pos); out_stream->write(reinterpret_cast(&entry_count), sizeof(entry_count)); + out_stream->seekp(current_pos); out_stream->flush(); } - void unsupervised_data_stream_writer_base::write_output() + void unsupervised_data_stream_writer::write( + neuron_data_type::input_type type_code, + const void * input_neurons) { + if (this->type_code == neuron_data_type::type_unknown) + { + this->type_code = type_code; + input_elem_size = neuron_data_type::get_input_size(this->type_code); + } + else if (this->type_code != type_code) + throw neural_network_exception((boost::format("Cannot write elements with different input type: %1% %2%") % this->type_code % type_code).str()); + + out_stream->write(reinterpret_cast(input_neurons), input_elem_size * input_neuron_count); + entry_count++; + } + + void unsupervised_data_stream_writer::write(const unsigned char * input_neurons) + { + if (type_code == neuron_data_type::type_unknown) + { + type_code = neuron_data_type::type_byte; + input_elem_size = neuron_data_type::get_input_size(type_code); + } + else if (type_code != neuron_data_type::type_byte) + throw neural_network_exception((boost::format("Cannot write elements with different input type: %1% %2%") % type_code % neuron_data_type::type_byte).str()); + + out_stream->write(reinterpret_cast(input_neurons), input_elem_size * input_neuron_count); + entry_count++; + } + + void unsupervised_data_stream_writer::write(const float * input_neurons) + { + if (type_code == neuron_data_type::type_unknown) + { + type_code = neuron_data_type::type_float; + input_elem_size = neuron_data_type::get_input_size(type_code); + } + else if (type_code != neuron_data_type::type_float) + throw neural_network_exception((boost::format("Cannot write elements with different input type: %1% %2%") % type_code % neuron_data_type::type_float).str()); + + out_stream->write(reinterpret_cast(input_neurons), input_elem_size * input_neuron_count); entry_count++; } } diff --git a/nnforge/unsupervised_data_stream_writer.h b/nnforge/unsupervised_data_stream_writer.h index b0a3594..60195b0 100644 --- a/nnforge/unsupervised_data_stream_writer.h +++ b/nnforge/unsupervised_data_stream_writer.h @@ -18,64 +18,48 @@ #include "unsupervised_data_stream_schema.h" #include "layer_configuration_specific.h" +#include "neuron_data_type.h" #include #include #include -#include namespace nnforge { - class unsupervised_data_stream_writer_base + class unsupervised_data_stream_writer { - protected: + public: + // The constructor modifies output_stream to throw exceptions in case of failure // The stream should be created with std::ios_base::binary flag - unsupervised_data_stream_writer_base( + unsupervised_data_stream_writer( std::tr1::shared_ptr output_stream, - const layer_configuration_specific& input_configuration, - unsigned int type_code); + const layer_configuration_specific& input_configuration); + + virtual ~unsupervised_data_stream_writer(); + + void write( + neuron_data_type::input_type type_code, + const void * input_neurons); - virtual ~unsupervised_data_stream_writer_base(); + void write(const float * input_neurons); - void write_output(); + void write(const unsigned char * input_neurons); + private: std::tr1::shared_ptr out_stream; unsigned int input_neuron_count; - private: - unsupervised_data_stream_writer_base(); + std::ostream::pos_type type_code_pos; + neuron_data_type::input_type type_code; + size_t input_elem_size; std::ostream::pos_type entry_count_pos; unsigned int entry_count; - }; - - template class unsupervised_data_stream_writer : public unsupervised_data_stream_writer_base - { - public: - // The constructor modifies output_stream to throw exceptions in case of failure - unsupervised_data_stream_writer( - std::tr1::shared_ptr output_stream, - const layer_configuration_specific& input_configuration) - : unsupervised_data_stream_writer_base(output_stream, input_configuration, data_type_code) - { - } - - virtual ~unsupervised_data_stream_writer() - { - } - - void write(const input_data_type * input_neurons) - { - out_stream->write(reinterpret_cast(input_neurons), sizeof(*input_neurons) * input_neuron_count); - - unsupervised_data_stream_writer_base::write_output(); - } private: unsupervised_data_stream_writer(const unsupervised_data_stream_writer&); unsupervised_data_stream_writer& operator =(const unsupervised_data_stream_writer&); }; - typedef unsupervised_data_stream_writer unsupervised_data_stream_writer_byte; - typedef unsupervised_data_stream_writer unsupervised_data_stream_writer_float; + typedef std::tr1::shared_ptr unsupervised_data_stream_writer_smart_ptr; } diff --git a/nnforge/validate_progress_network_data_pusher.cpp b/nnforge/validate_progress_network_data_pusher.cpp index d3f5f18..311e7c2 100644 --- a/nnforge/validate_progress_network_data_pusher.cpp +++ b/nnforge/validate_progress_network_data_pusher.cpp @@ -23,7 +23,7 @@ namespace nnforge { validate_progress_network_data_pusher::validate_progress_network_data_pusher( network_tester_smart_ptr tester, - supervised_data_reader_byte_smart_ptr reader, + supervised_data_reader_smart_ptr reader, testing_complete_result_set_visualizer_smart_ptr visualizer) : tester(tester) , reader(reader) diff --git a/nnforge/validate_progress_network_data_pusher.h b/nnforge/validate_progress_network_data_pusher.h index 48b052e..280fdf7 100644 --- a/nnforge/validate_progress_network_data_pusher.h +++ b/nnforge/validate_progress_network_data_pusher.h @@ -30,7 +30,7 @@ namespace nnforge public: validate_progress_network_data_pusher( network_tester_smart_ptr tester, - supervised_data_reader_byte_smart_ptr reader, + supervised_data_reader_smart_ptr reader, testing_complete_result_set_visualizer_smart_ptr visualizer); virtual ~validate_progress_network_data_pusher(); @@ -39,7 +39,7 @@ namespace nnforge protected: network_tester_smart_ptr tester; - supervised_data_reader_byte_smart_ptr reader; + supervised_data_reader_smart_ptr reader; output_neuron_value_set_smart_ptr actual_output_neuron_value_set; testing_complete_result_set_visualizer_smart_ptr visualizer; };