Skip to content

Commit

Permalink
- Added ability to keep and process input data with input neuron type…
Browse files Browse the repository at this point in the history
… 'float'

- Dropped feature: scaling input data when processing
  • Loading branch information
milakov committed Mar 3, 2013
1 parent f088678 commit e7f9c01
Show file tree
Hide file tree
Showing 65 changed files with 1,066 additions and 1,191 deletions.
4 changes: 2 additions & 2 deletions Settings.mk
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ OPENCV_PATH=/usr/local
CUDA_PATH=/usr/local/cuda
NVCC=nvcc
NNFORGE_PATH=../..
NNFORGE_INPUT_DATA_PATH=~/nnforge/input_data
NNFORGE_WORKING_DATA_PATH=~/nnforge/working_data
NNFORGE_INPUT_DATA_PATH=/home/max/nnforge/input_data
NNFORGE_WORKING_DATA_PATH=/home/max/nnforge/working_data

BOOST_LIBS=-lboost_regex-mt -lboost_chrono-mt -lboost_filesystem-mt -lboost_program_options-mt -lboost_random-mt -lboost_system-mt -lboost_date_time-mt
OPENCV_LIBS=-lopencv_highgui -lopencv_imgproc -lopencv_core
Expand Down
20 changes: 14 additions & 6 deletions examples/gtsrb/gtsrb_toolset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ void gtsrb_toolset::prepare_data()

void gtsrb_toolset::prepare_training_data()
{
std::tr1::shared_ptr<std::ofstream> file_with_data(new boost::filesystem::ofstream(get_working_data_folder() / training_data_filename, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc));
boost::filesystem::path file_path = get_working_data_folder() / training_data_filename;
std::cout << "Writing data to " << file_path.string() << std::endl;

std::tr1::shared_ptr<std::ofstream> file_with_data(new boost::filesystem::ofstream(file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc));
nnforge::layer_configuration_specific input_configuration;
input_configuration.feature_map_count = is_color ? 3 : 1;
input_configuration.dimension_sizes.push_back(image_width);
Expand All @@ -63,7 +66,7 @@ void gtsrb_toolset::prepare_training_data()
output_configuration.feature_map_count = class_count;
output_configuration.dimension_sizes.push_back(1);
output_configuration.dimension_sizes.push_back(1);
nnforge::supervised_data_stream_writer_byte writer(
nnforge::supervised_data_stream_writer writer(
file_with_data,
input_configuration,
output_configuration);
Expand All @@ -83,7 +86,10 @@ void gtsrb_toolset::prepare_training_data()

void gtsrb_toolset::prepare_validating_data()
{
std::tr1::shared_ptr<std::ofstream> file_with_data(new boost::filesystem::ofstream(get_working_data_folder() / validating_data_filename, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc));
boost::filesystem::path file_path = get_working_data_folder() / validating_data_filename;
std::cout << "Writing data to " << file_path.string() << std::endl;

std::tr1::shared_ptr<std::ofstream> file_with_data(new boost::filesystem::ofstream(file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc));
nnforge::layer_configuration_specific input_configuration;
input_configuration.feature_map_count = is_color ? 3 : 1;
input_configuration.dimension_sizes.push_back(image_width);
Expand All @@ -92,7 +98,7 @@ void gtsrb_toolset::prepare_validating_data()
output_configuration.feature_map_count = class_count;
output_configuration.dimension_sizes.push_back(1);
output_configuration.dimension_sizes.push_back(1);
nnforge::supervised_data_stream_writer_byte writer(
nnforge::supervised_data_stream_writer writer(
file_with_data,
input_configuration,
output_configuration);
Expand All @@ -108,14 +114,16 @@ void gtsrb_toolset::prepare_validating_data()
}

void gtsrb_toolset::write_folder(
nnforge::supervised_data_stream_writer_byte& writer,
nnforge::supervised_data_stream_writer& writer,
const boost::filesystem::path& relative_subfolder_path,
const char * annotation_file_name,
bool jitter)
{
boost::filesystem::path subfolder_path = get_input_data_folder() / relative_subfolder_path;
boost::filesystem::path annotation_file_path = subfolder_path / annotation_file_name;

std::cout << "Reading input data from " << subfolder_path.string() << std::endl;

boost::filesystem::ifstream file_input(annotation_file_path, std::ios_base::in);

nnforge::random_generator generator = nnforge::rnd::get_random_generator();
Expand Down Expand Up @@ -188,7 +196,7 @@ void gtsrb_toolset::write_folder(
}

void gtsrb_toolset::write_signle_entry(
nnforge::supervised_data_stream_writer_byte& writer,
nnforge::supervised_data_stream_writer& writer,
const boost::filesystem::path& absolute_file_path,
unsigned int class_id,
unsigned int roi_top_left_x,
Expand Down
4 changes: 2 additions & 2 deletions examples/gtsrb/gtsrb_toolset.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class gtsrb_toolset : public nnforge::neural_network_toolset
void prepare_validating_data();

void write_signle_entry(
nnforge::supervised_data_stream_writer_byte& writer,
nnforge::supervised_data_stream_writer& writer,
const boost::filesystem::path& absolute_file_path,
unsigned int class_id,
unsigned int roi_top_left_x,
Expand All @@ -68,7 +68,7 @@ class gtsrb_toolset : public nnforge::neural_network_toolset
float brightness_shift = 0.0F);

void write_folder(
nnforge::supervised_data_stream_writer_byte& writer,
nnforge::supervised_data_stream_writer& writer,
const boost::filesystem::path& relative_subfolder_path,
const char * annotation_file_name,
bool jitter);
Expand Down
10 changes: 10 additions & 0 deletions nnforge/cuda/cuda_linear_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,16 @@ namespace nnforge
return (unsigned char *)(get_buf());
}

cuda_linear_buffer::operator uchar4 *()
{
return (uchar4 *)(get_buf());
}

cuda_linear_buffer::operator const uchar4 *() const
{
return (uchar4 *)(get_buf());
}

cuda_linear_buffer::operator unsigned int *()
{
return (unsigned int *)(get_buf());
Expand Down
4 changes: 4 additions & 0 deletions nnforge/cuda/cuda_linear_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ namespace nnforge

operator const unsigned char *() const;

operator uchar4 *();

operator const uchar4 *() const;

operator unsigned int *();

operator const unsigned int *() const;
Expand Down
2 changes: 1 addition & 1 deletion nnforge/cuda/cuda_util.cu
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ namespace nnforge
int total_thread_count,
int divisible)
{
int initial_threadblock_count = total_thread_count / 256;
int initial_threadblock_count = std::max<int>(total_thread_count / 256, 1);
int minimum_threadblock_count = cuda_config.multiprocessor_count * 8;

if (initial_threadblock_count >= minimum_threadblock_count)
Expand Down
87 changes: 39 additions & 48 deletions nnforge/cuda/hessian_calculator_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,24 +29,20 @@
#include <boost/format.hpp>

__global__ void convert_compacted_to_raw_hess_kernel(
const unsigned char * __restrict input,
float * __restrict output,
const float * __restrict scale_addition,
const float * __restrict scale_multiplication,
int elem_count_per_feature_map,
int feature_map_count,
int entry_count)
const uchar4 * __restrict input,
float4 * __restrict output,
int elem_count)
{
int elem_id_inside_feature_map = blockIdx.x * blockDim.x + threadIdx.x;
int feature_map_id = blockIdx.y * blockDim.y + threadIdx.y;
int entry_id = blockIdx.z * blockDim.z + threadIdx.z;
bool in_bounds = (entry_id < entry_count) && (elem_id_inside_feature_map < elem_count_per_feature_map) && (feature_map_id < feature_map_count);
if (in_bounds)
int elem_id = blockDim.x * (blockIdx.y * gridDim.x + blockIdx.x) + threadIdx.x;
if (elem_id < elem_count)
{
int offset = elem_count_per_feature_map * (entry_id * feature_map_count + feature_map_id) + elem_id_inside_feature_map;
unsigned char val = input[offset];
float converted_val = ((val * (1.0F / 255.0F)) + scale_addition[feature_map_id]) * scale_multiplication[feature_map_id];
output[offset] = converted_val;
uchar4 inp = input[elem_id];
float4 val;
val.x = inp.x * (1.0F / 255.0F);
val.y = inp.y * (1.0F / 255.0F);
val.z = inp.z * (1.0F / 255.0F);
val.w = inp.w * (1.0F / 255.0F);
output[elem_id] = val;
}
}

Expand All @@ -66,9 +62,8 @@ namespace nnforge
{
hessian_calculator_cuda::hessian_calculator_cuda(
network_schema_smart_ptr schema,
const_data_scale_params_smart_ptr scale_params,
cuda_running_configuration_const_smart_ptr cuda_config)
: hessian_calculator(schema, scale_params)
: hessian_calculator(schema)
, cuda_config(cuda_config)
{
const const_layer_list& layer_list = *schema;
Expand Down Expand Up @@ -111,7 +106,7 @@ namespace nnforge
}

network_data_smart_ptr hessian_calculator_cuda::actual_get_hessian(
supervised_data_reader_byte& reader,
supervised_data_reader& reader,
network_data_smart_ptr data,
unsigned int hessian_entry_to_process_count)
{
Expand All @@ -125,7 +120,8 @@ namespace nnforge
unsigned int input_neuron_count = input_configuration.get_neuron_count();
unsigned int input_neuron_count_per_feature_map = input_configuration.get_neuron_count_per_feature_map();
unsigned int output_neuron_count = output_configuration.get_neuron_count();
unsigned int input_feature_map_count = input_configuration.feature_map_count;
neuron_data_type::input_type type_code = reader.get_input_type();
size_t input_neuron_elem_size = reader.get_input_neuron_elem_size();

std::vector<std::vector<const_cuda_linear_buffer_device_smart_ptr> > net_data = enqueue_get_data(data, *command_stream);
std::vector<std::vector<const_cuda_linear_buffer_device_smart_ptr> > net_data_squared = enqueue_get_data_squared(net_data, *command_stream);
Expand All @@ -134,8 +130,8 @@ namespace nnforge
buffer_cuda_size_configuration buffers_config;
update_buffers_configuration(buffers_config);

buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(unsigned char)); // input
buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(unsigned char)); // input
buffers_config.add_per_entry_buffer(input_neuron_count * input_neuron_elem_size); // input
buffers_config.add_per_entry_buffer(input_neuron_count * input_neuron_elem_size); // input
buffers_config.add_per_entry_buffer(input_neuron_count * sizeof(float)); // converted input
buffers_config.add_per_entry_buffer(output_neuron_count * sizeof(float)); // initial error

Expand All @@ -155,8 +151,8 @@ namespace nnforge

cuda_linear_buffer_device_smart_ptr input_buf[2] =
{
cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * sizeof(unsigned char))),
cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * sizeof(unsigned char))),
cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * input_neuron_elem_size)),
cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * input_neuron_elem_size)),
};

cuda_linear_buffer_device_smart_ptr input_converted_buf(new cuda_linear_buffer_device(input_neuron_count * max_entry_count * sizeof(float)));
Expand Down Expand Up @@ -190,7 +186,7 @@ namespace nnforge
output_errors = all_buffers.input_errors_buffer;
}

cuda_linear_buffer_host_smart_ptr input_host_buf(new cuda_linear_buffer_host(input_neuron_count * max_entry_count * sizeof(unsigned char)));
cuda_linear_buffer_host_smart_ptr input_host_buf(new cuda_linear_buffer_host(input_neuron_count * max_entry_count * input_neuron_elem_size));
unsigned char * input = *input_host_buf;

unsigned int current_data_slot = 0;
Expand All @@ -209,21 +205,27 @@ namespace nnforge
if (entries_available_for_processing_count > 0)
{
// Convert input
if (type_code == neuron_data_type::type_byte)
{
std::pair<dim3, dim3> convert_compacted_to_raw_2d_surf_kernel_dims = cuda_util::get_grid_and_threadblock_sizes_sequential_access(
int elem_count = (input_neuron_count * entries_available_for_processing_count + 3) / 4;
std::pair<dim3, dim3> kernel_dims = cuda_util::get_grid_and_threadblock_sizes_sequential_access(
*cuda_config,
input_neuron_count_per_feature_map,
input_feature_map_count,
entries_available_for_processing_count);
convert_compacted_to_raw_hess_kernel<<<convert_compacted_to_raw_2d_surf_kernel_dims.first, convert_compacted_to_raw_2d_surf_kernel_dims.second, 0, *command_stream>>>(
elem_count);
convert_compacted_to_raw_hess_kernel<<<kernel_dims.first, kernel_dims.second, 0, *command_stream>>>(
*input_buf[current_command_slot],
*input_converted_buf,
*scale_addition,
*scale_multiplication,
input_neuron_count_per_feature_map,
input_feature_map_count,
entries_available_for_processing_count);
elem_count);
}
else if (type_code == neuron_data_type::type_float)
{
cuda_safe_call(cudaMemcpyAsync(
*input_converted_buf,
*input_buf[current_command_slot],
input_neuron_count * entries_available_for_processing_count * sizeof(float),
cudaMemcpyDeviceToDevice,
*command_stream));
}
else throw neural_network_exception((boost::format("actual_get_hessian cannot handle input neurons of type %1%") % type_code).str());

// Run ann
{
Expand Down Expand Up @@ -313,7 +315,7 @@ namespace nnforge
unsigned int entries_to_read_count = std::min<unsigned int>(max_entry_count, entries_available_for_copy_in_count);
while(entries_read_count < entries_to_read_count)
{
bool entry_read = reader.read(input + (input_neuron_count * entries_read_count), 0);
bool entry_read = reader.read(input + (input_neuron_count * entries_read_count * input_neuron_elem_size), 0);

if (!entry_read)
break;
Expand All @@ -323,7 +325,7 @@ namespace nnforge
cuda_safe_call(cudaMemcpyAsync(
*(input_buf[current_data_slot]),
input,
entries_read_count * input_neuron_count * sizeof(unsigned char),
entries_read_count * input_neuron_count * input_neuron_elem_size,
cudaMemcpyHostToDevice,
*data_stream));
}
Expand Down Expand Up @@ -367,14 +369,6 @@ namespace nnforge
*(it_conf + 1),
(it_conf > layer_config_list.begin() + testing_layer_count)));
}

scale_multiplication = cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(
&(*current_scale_params->multiplication_list.begin()),
current_scale_params->multiplication_list.size() * sizeof(float)));

scale_addition = cuda_linear_buffer_device_smart_ptr(new cuda_linear_buffer_device(
&(*current_scale_params->addition_list.begin()),
current_scale_params->addition_list.size() * sizeof(float)));
}

std::vector<std::vector<const_cuda_linear_buffer_device_smart_ptr> > hessian_calculator_cuda::enqueue_get_data(
Expand Down Expand Up @@ -500,9 +494,6 @@ namespace nnforge

void hessian_calculator_cuda::update_buffers_configuration(buffer_cuda_size_configuration& buffer_configuration) const
{
buffer_configuration.add_constant_buffer(scale_addition->get_size());
buffer_configuration.add_constant_buffer(scale_multiplication->get_size());

for(std::vector<std::vector<const_cuda_linear_buffer_device_smart_ptr> >::const_iterator it = testing_schema_data.begin(); it != testing_schema_data.end(); ++it)
for(std::vector<const_cuda_linear_buffer_device_smart_ptr>::const_iterator it2 = it->begin(); it2 != it->end(); ++it2)
buffer_configuration.add_constant_buffer((*it2)->get_size());
Expand Down
6 changes: 1 addition & 5 deletions nnforge/cuda/hessian_calculator_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,14 @@ namespace nnforge
public:
hessian_calculator_cuda(
network_schema_smart_ptr schema,
const_data_scale_params_smart_ptr scale_params,
cuda_running_configuration_const_smart_ptr cuda_config);

virtual ~hessian_calculator_cuda();

protected:
// schema, data and reader are guaranteed to be compatible
virtual network_data_smart_ptr actual_get_hessian(
supervised_data_reader_byte& reader,
supervised_data_reader& reader,
network_data_smart_ptr data,
unsigned int hessian_entry_to_process_count);

Expand Down Expand Up @@ -86,9 +85,6 @@ namespace nnforge
unsigned int testing_layer_count;
const_layer_list::const_iterator start_layer_nonempty_weights_iterator;

const_cuda_linear_buffer_device_smart_ptr scale_addition;
const_cuda_linear_buffer_device_smart_ptr scale_multiplication;

const_layer_testing_schema_list testing_schemas;
std::vector<std::vector<const_cuda_linear_buffer_device_smart_ptr> > testing_schema_data;
std::vector<layer_tester_cuda_smart_ptr> tester_list;
Expand Down
6 changes: 2 additions & 4 deletions nnforge/cuda/hessian_calculator_cuda_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,9 @@ namespace nnforge
{
}

hessian_calculator_smart_ptr hessian_calculator_cuda_factory::create(
network_schema_smart_ptr schema,
const_data_scale_params_smart_ptr scale_params) const
hessian_calculator_smart_ptr hessian_calculator_cuda_factory::create(network_schema_smart_ptr schema) const
{
return hessian_calculator_smart_ptr(new hessian_calculator_cuda(schema, scale_params, cuda_config));
return hessian_calculator_smart_ptr(new hessian_calculator_cuda(schema, cuda_config));
}
}
}
4 changes: 1 addition & 3 deletions nnforge/cuda/hessian_calculator_cuda_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@ namespace nnforge

virtual ~hessian_calculator_cuda_factory();

virtual hessian_calculator_smart_ptr create(
network_schema_smart_ptr schema,
const_data_scale_params_smart_ptr scale_params) const;
virtual hessian_calculator_smart_ptr create(network_schema_smart_ptr schema) const;

protected:
cuda_running_configuration_const_smart_ptr cuda_config;
Expand Down
Loading

0 comments on commit e7f9c01

Please sign in to comment.