From 16e2ebc820601c6fad86339e42cd15f171b6c905 Mon Sep 17 00:00:00 2001 From: milakov Date: Wed, 15 May 2013 21:47:59 +0400 Subject: [PATCH] Preparing data command split into training and testing separate commands --- Settings.mk | 2 +- examples/gtsrb/README.md | 2 +- examples/gtsrb/gtsrb.cpp | 2 +- examples/gtsrb/gtsrb_toolset.cpp | 108 ++++++++++++++--------------- examples/gtsrb/gtsrb_toolset.h | 6 +- nnforge/neural_network_toolset.cpp | 15 +++- nnforge/neural_network_toolset.h | 4 +- 7 files changed, 70 insertions(+), 69 deletions(-) diff --git a/Settings.mk b/Settings.mk index bb735c0..f3b1417 100644 --- a/Settings.mk +++ b/Settings.mk @@ -21,6 +21,6 @@ LD_FLAGS_OPENMP=-fopenmp CUDA_FLAGS_COMMON=-use_fast_math CUDA_FLAGS_ARCH_FERMI=-gencode=arch=compute_20,code=sm_20 CUDA_FLAGS_ARCH_KEPLER=-gencode=arch=compute_30,code=sm_30 -gencode=arch=compute_35,code=\"sm_35,compute_35\" -CUDA_FLAGS_DEBUG_MODE=-g -G -lineinfo +CUDA_FLAGS_DEBUG_MODE=-g -lineinfo CUDA_FLAGS_RELEASE_MODE=-O3 -lineinfo diff --git a/examples/gtsrb/README.md b/examples/gtsrb/README.md index d5a3813..400c6bd 100644 --- a/examples/gtsrb/README.md +++ b/examples/gtsrb/README.md @@ -38,7 +38,7 @@ GT-final_test.csv in Final_Test should be the one with class IDs, from _Extended Train ----- - ./gtsrb prepare_data + ./gtsrb prepare_training_data ./gtsrb randomize_data ./gtsrb create ./gtsrb train_batch -N 10 diff --git a/examples/gtsrb/gtsrb.cpp b/examples/gtsrb/gtsrb.cpp index 6e7967e..c3a1ba0 100644 --- a/examples/gtsrb/gtsrb.cpp +++ b/examples/gtsrb/gtsrb.cpp @@ -45,7 +45,7 @@ int main(int argc, char* argv[]) } catch (const std::exception& e) { - std::cout << e.what() << std::endl; + std::cout << "Exception caught: " << e.what() << std::endl; return 1; } diff --git a/examples/gtsrb/gtsrb_toolset.cpp b/examples/gtsrb/gtsrb_toolset.cpp index 7e374fc..058bb6a 100644 --- a/examples/gtsrb/gtsrb_toolset.cpp +++ b/examples/gtsrb/gtsrb_toolset.cpp @@ -45,74 +45,68 @@ gtsrb_toolset::~gtsrb_toolset() { } -void gtsrb_toolset::prepare_data() -{ - prepare_training_data(); - - prepare_validating_data(); -} - void gtsrb_toolset::prepare_training_data() { - boost::filesystem::path file_path = get_working_data_folder() / training_data_filename; - std::cout << "Writing data to " << file_path.string() << std::endl; - - std::tr1::shared_ptr file_with_data(new boost::filesystem::ofstream(file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc)); - nnforge::layer_configuration_specific input_configuration; - input_configuration.feature_map_count = is_color ? 3 : 1; - input_configuration.dimension_sizes.push_back(image_width); - input_configuration.dimension_sizes.push_back(image_height); - nnforge::layer_configuration_specific output_configuration; - output_configuration.feature_map_count = class_count; - output_configuration.dimension_sizes.push_back(1); - output_configuration.dimension_sizes.push_back(1); - nnforge::supervised_data_stream_writer writer( - file_with_data, - input_configuration, - output_configuration); - - for(unsigned int folder_id = 0; folder_id < class_count; ++folder_id) { - boost::filesystem::path subfolder_name = boost::filesystem::path("Final_Training") / "Images" / (boost::format("%|1$05d|") % folder_id).str(); - std::string annotation_file_name = (boost::format("GT-%|1$05d|.csv") % folder_id).str(); + boost::filesystem::path file_path = get_working_data_folder() / training_data_filename; + std::cout << "Writing data to " << file_path.string() << std::endl; + + std::tr1::shared_ptr file_with_data(new boost::filesystem::ofstream(file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc)); + nnforge::layer_configuration_specific input_configuration; + input_configuration.feature_map_count = is_color ? 3 : 1; + input_configuration.dimension_sizes.push_back(image_width); + input_configuration.dimension_sizes.push_back(image_height); + nnforge::layer_configuration_specific output_configuration; + output_configuration.feature_map_count = class_count; + output_configuration.dimension_sizes.push_back(1); + output_configuration.dimension_sizes.push_back(1); + nnforge::supervised_data_stream_writer writer( + file_with_data, + input_configuration, + output_configuration); + + for(unsigned int folder_id = 0; folder_id < class_count; ++folder_id) + { + boost::filesystem::path subfolder_name = boost::filesystem::path("Final_Training") / "Images" / (boost::format("%|1$05d|") % folder_id).str(); + std::string annotation_file_name = (boost::format("GT-%|1$05d|.csv") % folder_id).str(); + + write_folder( + writer, + subfolder_name, + annotation_file_name.c_str(), + true); + } + } + + { + boost::filesystem::path file_path = get_working_data_folder() / validating_data_filename; + std::cout << "Writing data to " << file_path.string() << std::endl; + + std::tr1::shared_ptr file_with_data(new boost::filesystem::ofstream(file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc)); + nnforge::layer_configuration_specific input_configuration; + input_configuration.feature_map_count = is_color ? 3 : 1; + input_configuration.dimension_sizes.push_back(image_width); + input_configuration.dimension_sizes.push_back(image_height); + nnforge::layer_configuration_specific output_configuration; + output_configuration.feature_map_count = class_count; + output_configuration.dimension_sizes.push_back(1); + output_configuration.dimension_sizes.push_back(1); + nnforge::supervised_data_stream_writer writer( + file_with_data, + input_configuration, + output_configuration); + + boost::filesystem::path subfolder_name = boost::filesystem::path("Final_Test") / "Images"; + std::string annotation_file_name = "GT-final_test.csv"; write_folder( writer, subfolder_name, annotation_file_name.c_str(), - true); + false); } } -void gtsrb_toolset::prepare_validating_data() -{ - boost::filesystem::path file_path = get_working_data_folder() / validating_data_filename; - std::cout << "Writing data to " << file_path.string() << std::endl; - - std::tr1::shared_ptr file_with_data(new boost::filesystem::ofstream(file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc)); - nnforge::layer_configuration_specific input_configuration; - input_configuration.feature_map_count = is_color ? 3 : 1; - input_configuration.dimension_sizes.push_back(image_width); - input_configuration.dimension_sizes.push_back(image_height); - nnforge::layer_configuration_specific output_configuration; - output_configuration.feature_map_count = class_count; - output_configuration.dimension_sizes.push_back(1); - output_configuration.dimension_sizes.push_back(1); - nnforge::supervised_data_stream_writer writer( - file_with_data, - input_configuration, - output_configuration); - - boost::filesystem::path subfolder_name = boost::filesystem::path("Final_Test") / "Images"; - std::string annotation_file_name = "GT-final_test.csv"; - - write_folder( - writer, - subfolder_name, - annotation_file_name.c_str(), - false); -} - void gtsrb_toolset::write_folder( nnforge::supervised_data_stream_writer& writer, const boost::filesystem::path& relative_subfolder_path, diff --git a/examples/gtsrb/gtsrb_toolset.h b/examples/gtsrb/gtsrb_toolset.h index 97ae944..7de9802 100644 --- a/examples/gtsrb/gtsrb_toolset.h +++ b/examples/gtsrb/gtsrb_toolset.h @@ -44,15 +44,11 @@ class gtsrb_toolset : public nnforge::neural_network_toolset virtual ~gtsrb_toolset(); protected: - virtual void prepare_data(); - virtual nnforge::network_schema_smart_ptr get_schema(); virtual std::map get_dropout_rate_map() const; - void prepare_training_data(); - - void prepare_validating_data(); + virtual void prepare_training_data(); void write_single_entry( nnforge::supervised_data_stream_writer& writer, diff --git a/nnforge/neural_network_toolset.cpp b/nnforge/neural_network_toolset.cpp index ec28e3e..a201084 100644 --- a/nnforge/neural_network_toolset.cpp +++ b/nnforge/neural_network_toolset.cpp @@ -87,9 +87,13 @@ namespace nnforge { create(); } - else if (!action.compare("prepare_data")) + else if (!action.compare("prepare_training_data")) { - prepare_data(); + prepare_training_data(); + } + else if (!action.compare("prepare_testing_data")) + { + prepare_testing_data(); } else if (!action.compare("randomize_data")) { @@ -149,6 +153,11 @@ namespace nnforge } } + void neural_network_toolset::prepare_testing_data() + { + throw std::runtime_error("This toolset doesn't implement preparing testing data"); + } + bool neural_network_toolset::parse(int argc, char* argv[]) { boost::filesystem::path config_file; @@ -161,7 +170,7 @@ namespace nnforge boost::program_options::options_description gener("Generic options"); gener.add_options() ("help", "produce help message") - ("action,A", boost::program_options::value(&action), "run action (info, create, prepare_data, randomize_data, test, test_batch, validate, validate_batch, validate_infinite, train, train_batch, snapshot, snapshot_invalid, profile_updater, profile_hessian)") + ("action,A", boost::program_options::value(&action), "run action (info, create, prepare_training_data, prepare_testing_data, randomize_data, test, test_batch, validate, validate_batch, validate_infinite, train, train_batch, snapshot, snapshot_invalid, profile_updater, profile_hessian)") ("config,C", boost::program_options::value(&config_file)->default_value(default_config_path), "path to the configuration file.") ; diff --git a/nnforge/neural_network_toolset.h b/nnforge/neural_network_toolset.h index db2abe7..aa9f1ed 100644 --- a/nnforge/neural_network_toolset.h +++ b/nnforge/neural_network_toolset.h @@ -51,7 +51,9 @@ namespace nnforge virtual std::vector get_int_options(); - virtual void prepare_data() = 0; + virtual void prepare_training_data() = 0; + + virtual void prepare_testing_data(); virtual network_schema_smart_ptr get_schema() = 0;