diff --git a/nnforge/cuda/network_updater_cuda.cu b/nnforge/cuda/network_updater_cuda.cu index 93ba618..cee015c 100644 --- a/nnforge/cuda/network_updater_cuda.cu +++ b/nnforge/cuda/network_updater_cuda.cu @@ -127,6 +127,8 @@ namespace nnforge { std::vector res; + entry_count_updated_in_profile_mode = 0; + unsigned int min_dropout_layer_id = testing_layer_count + 1; for(std::map::const_iterator it = layer_to_dropout_rate_map.begin(); it != layer_to_dropout_rate_map.end(); ++it) if (it->first < min_dropout_layer_id) @@ -250,7 +252,7 @@ namespace nnforge if (cuda_config->is_flush_required()) { cuda_safe_call(cudaEventRecord(data_processed_event, *command_stream)); - cudaEventQuery(data_processed_event); + cuda_safe_call(cudaEventQuery(data_processed_event)); } std::tr1::variate_generator > gen_random_offset( rnd::get_random_generator(), @@ -416,12 +418,12 @@ namespace nnforge if (((input_entry_id % 16) == 1) && cuda_config->is_flush_required()) { cuda_safe_call(cudaEventRecord(data_processed_event, *command_stream)); - cudaEventQuery(data_processed_event); + cuda_safe_call(cudaEventQuery(data_processed_event)); } } // for(unsigned int input_entry_id if (profile_mode) - entry_count_updated_in_profile_mode = entries_available_for_processing_count; + entry_count_updated_in_profile_mode += entries_available_for_processing_count; for(std::vector::iterator it = res.begin(); it != res.end(); ++it) (*it)->entry_count += entries_available_for_processing_count; @@ -429,7 +431,7 @@ namespace nnforge if (cuda_config->is_flush_required()) { cuda_safe_call(cudaEventRecord(data_processed_event, *command_stream)); - cudaEventQuery(data_processed_event); + cuda_safe_call(cudaEventQuery(data_processed_event)); } } // if (entries_available_for_processing_count > 0) diff --git a/nnforge/cuda/neural_network_cuda_exception.h b/nnforge/cuda/neural_network_cuda_exception.h index fea2121..6df034c 100644 --- a/nnforge/cuda/neural_network_cuda_exception.h +++ b/nnforge/cuda/neural_network_cuda_exception.h @@ -32,4 +32,4 @@ namespace nnforge } } -#define cuda_safe_call(callstr) {cudaError_t error_code = callstr; if (error_code != cudaSuccess) throw nnforge::cuda::neural_network_cuda_exception(error_code);} +#define cuda_safe_call(callstr) {cudaError_t error_code = callstr; if ((error_code != cudaSuccess) && (error_code != cudaErrorNotReady)) throw nnforge::cuda::neural_network_cuda_exception(error_code);} diff --git a/nnforge/neural_network_toolset.cpp b/nnforge/neural_network_toolset.cpp index a201084..cfcbc01 100644 --- a/nnforge/neural_network_toolset.cpp +++ b/nnforge/neural_network_toolset.cpp @@ -930,7 +930,7 @@ namespace nnforge std::cout << (boost::format("%|1$.1f| GFLOPs, %|2$.2f| seconds") % gflops % time_to_complete_seconds) << std::endl; } - std::cout << data[0]->get_stat() << std::endl; + std::cout << data[data.size()-1]->get_stat() << std::endl; } void neural_network_toolset::profile_hessian()