Skip to content

Commit

Permalink
Revert TF changes on DEVEL
Browse files Browse the repository at this point in the history
  • Loading branch information
mrodozov committed Aug 7, 2019
1 parent f7de3c7 commit dab137c
Show file tree
Hide file tree
Showing 15 changed files with 815 additions and 1,337 deletions.
180 changes: 102 additions & 78 deletions DQMServices/Core/src/ROOTFilePB.pb.cc

Large diffs are not rendered by default.

152 changes: 72 additions & 80 deletions DQMServices/Core/src/ROOTFilePB.pb.h

Large diffs are not rendered by default.

3 changes: 0 additions & 3 deletions PhysicsTools/TensorFlow/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
<flags REM_CXXFLAGS="-Werror=sign-compare" />

<use name="tensorflow" />
<use name="tensorflow-cc" />

<use name="FWCore/Framework" />
Expand Down
2 changes: 1 addition & 1 deletion PhysicsTools/TensorFlow/interface/TensorFlow.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* TensorFlow interface helpers.
* Based on TensorFlow C++ API 1.13.
* Based on TensorFlow C++ API 1.3.
* For more info, see https://gitlab.cern.ch/mrieger/CMSSW-DNN.
*
* Author: Marcel Rieger
Expand Down
733 changes: 263 additions & 470 deletions PhysicsTools/TensorFlow/src/NTSession.cc

Large diffs are not rendered by default.

85 changes: 16 additions & 69 deletions PhysicsTools/TensorFlow/src/NTSession.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ limitations under the License.

/*
This file is an adaptation of the original direct_session.h file located at
https://raw.githubusercontent.com/tensorflow/tensorflow/v1.13.1/tensorflow/core/common_runtime/direct_session.h
https://github.com/tensorflow/tensorflow/blob/v1.6.0/tensorflow/core/common_runtime/direct_session.h
to meet the demands of the software environment developed and used by the CMS collaboration.
Changes:
Expand Down Expand Up @@ -51,7 +51,6 @@ to meet the demands of the software environment developed and used by the CMS co
#include "tensorflow/core/common_runtime/rendezvous_mgr.h"
#include "tensorflow/core/common_runtime/session_factory.h"
#include "tensorflow/core/framework/cancellation.h"
#include "tensorflow/core/framework/collective.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/session_state.h"
#include "tensorflow/core/framework/tensor.h"
Expand Down Expand Up @@ -124,17 +123,7 @@ namespace tensorflow {
cost_model_manager_.ExportCostModels(cost_models);
}

::tensorflow::Status MakeCallable(const CallableOptions& callable_options, CallableHandle* out_handle) override;
::tensorflow::Status RunCallable(CallableHandle handle,
const std::vector<Tensor>& feed_tensors,
std::vector<Tensor>* fetch_tensors,
RunMetadata* run_metadata) override;
::tensorflow::Status ReleaseCallable(CallableHandle handle) override;

private:
// For access to collective_graph_key_.
friend class NTSessionCollectiveTest;

// We create one executor and its dependent library runtime for
// every partition.
struct PerPartitionExecutorsAndLib {
Expand Down Expand Up @@ -166,10 +155,6 @@ namespace tensorflow {

DataTypeVector input_types;
DataTypeVector output_types;

CallableOptions callable_options;

int64 collective_graph_key = BuildGraphOptions::kNoCollectiveGraphKey;
};

// A FunctionInfo object is created for every unique set of feeds/fetches.
Expand All @@ -196,7 +181,6 @@ namespace tensorflow {
mutex mu_;
Status status GUARDED_BY(mu_);
IntraProcessRendezvous* rendez = nullptr;
std::unique_ptr<CollectiveExecutor::Handle> collective_executor;
std::unique_ptr<StepStatsCollector> collector;
Notification executors_done;
std::unordered_map<string, bool> pending_inputs; // true if fed
Expand Down Expand Up @@ -224,13 +208,12 @@ namespace tensorflow {
string handle;
std::unique_ptr<Graph> graph;
const DebugOptions& debug_options;
int64 collective_graph_key = BuildGraphOptions::kNoCollectiveGraphKey;
};

// Initializes the base execution state given the 'graph',
// if not already initialized.
Status MaybeInitializeExecutionState(const GraphDef& graph, bool* out_already_initialized)
EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_);
EXCLUSIVE_LOCKS_REQUIRED(graph_def_lock_);

// Retrieves an already existing set of executors to run 'inputs' and
// 'outputs', or creates and caches them for future use.
Expand All @@ -240,13 +223,6 @@ namespace tensorflow {
ExecutorsAndKeys** executors_and_keys,
RunStateArgs* run_state_args);

// Creates a set of executors to run the subgraph defined by
// `callable_options`.
::tensorflow::Status CreateExecutors(const CallableOptions& callable_options,
std::unique_ptr<ExecutorsAndKeys>* out_executors_and_keys,
std::unique_ptr<FunctionInfo>* out_func_info,
RunStateArgs* run_state_args);

// Creates several graphs given the existing graph_def_ and the
// input feeds and fetches, given 'devices'. The graphs share a common
// function library 'flib_def'.
Expand All @@ -255,21 +231,9 @@ namespace tensorflow {
std::unique_ptr<FunctionLibraryDefinition>* flib_def,
RunStateArgs* run_state_args,
DataTypeVector* input_types,
DataTypeVector* output_types,
int64* collective_graph_key);

::tensorflow::Status RunInternal(int64 step_id,
const RunOptions& run_options,
CallFrameInterface* call_frame,
ExecutorsAndKeys* executors_and_keys,
RunMetadata* run_metadata);

// Returns whether inter-op execution uses a global pool or the input
// `run_options` requests being run on inter_op_thread_pool = 0 in case
// multiple pools are configured.
bool ShouldUseRunHandlerPool(const RunOptions& run_options) const;
DataTypeVector* output_types);

::tensorflow::Status ExtendLocked(const GraphDef& graph) EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_);
::tensorflow::Status ExtendLocked(const GraphDef& graph) EXCLUSIVE_LOCKS_REQUIRED(graph_def_lock_);

::tensorflow::Status ResourceHandleToInputTensor(const Tensor& resource_tensor, Tensor* retrieved_tensor);

Expand Down Expand Up @@ -306,18 +270,12 @@ namespace tensorflow {
return ::tensorflow::Status::OK();
}

::tensorflow::Status CheckGraphCreated(const char* method) {
mutex_lock l(graph_state_lock_);
if (!graph_created_) {
return errors::InvalidArgument("Session was not created with a graph before ", method, "!");
}
return ::tensorflow::Status::OK();
}

::tensorflow::Status CreateDebuggerState(const CallableOptions& options,
int64 global_step,
::tensorflow::Status CreateDebuggerState(const DebugOptions& debug_options,
int64 session_run_index,
int64 executor_step_index,
const std::vector<string>& input_names,
const std::vector<string>& output_names,
const std::vector<string>& target_names,
std::unique_ptr<DebuggerStateInterface>* debugger_state);

::tensorflow::Status DecorateAndPublishGraphForDebug(const DebugOptions& debug_options,
Expand All @@ -332,8 +290,10 @@ namespace tensorflow {
DeviceSet device_set_;

string session_handle_;
mutex graph_state_lock_;
bool graph_created_ GUARDED_BY(graph_state_lock_) = false;
bool graph_created_ GUARDED_BY(graph_def_lock_) = false;

mutex graph_def_lock_;
GraphDef graph_def_ GUARDED_BY(graph_def_lock_);

Status init_error_; // Set to an error if construction failed.

Expand All @@ -351,16 +311,6 @@ namespace tensorflow {
// same ExecutorsAndKey object.
std::unordered_map<string, std::shared_ptr<ExecutorsAndKeys>> executors_ GUARDED_BY(executor_lock_);

class RunCallableCallFrame;
struct Callable {
std::shared_ptr<ExecutorsAndKeys> executors_and_keys;
std::shared_ptr<FunctionInfo> function_info;
~Callable();
};
mutex callables_lock_;
int64 next_callable_handle_ GUARDED_BY(callables_lock_) = 0;
std::unordered_map<int64, Callable> callables_ GUARDED_BY(callables_lock_);

// Holds mappings from handle to partial run state.
std::unordered_map<string, std::unique_ptr<RunState>> partial_runs_ GUARDED_BY(executor_lock_);

Expand All @@ -369,16 +319,15 @@ namespace tensorflow {

NTSessionFactory* const factory_; // not owned
CancellationManager* cancellation_manager_;
std::unique_ptr<CollectiveExecutorMgrInterface> collective_executor_mgr_;

// Map of placed stateful nodes, i.e. nodes for which is_stateful()
// is true, such as "params" and "queue" nodes. Once placed these
// nodes can not be moved to a different device. Maps node names to
// device names.
std::unordered_map<string, string> stateful_placements_ GUARDED_BY(graph_state_lock_);
std::unordered_map<string, string> stateful_placements_ GUARDED_BY(graph_def_lock_);

// Execution_state; used when placing the entire graph.
std::unique_ptr<GraphExecutionState> execution_state_ GUARDED_BY(graph_state_lock_);
std::unique_ptr<GraphExecutionState> execution_state_ GUARDED_BY(graph_def_lock_);

// The function library, before any rewrites or optimizations have been
// performed. In particular, CreateGraphs() may need to modify the function
Expand All @@ -393,7 +342,7 @@ namespace tensorflow {
std::atomic<int64> edge_name_counter_ = {0};
std::atomic<int64> handle_name_counter_ = {0};

// For generating step ids that are unique across this sessions.
// For generating step ids that are unique across all sessions.
static std::atomic_int_fast64_t step_id_counter_;

// Global timeout for all blocking operations in this session.
Expand All @@ -402,9 +351,7 @@ namespace tensorflow {
// Manages all the cost models for the graphs executed in this session.
CostModelManager cost_model_manager_;

// For testing collective graph key generation.
mutex collective_graph_key_lock_;
int64 collective_graph_key_ GUARDED_BY(collective_graph_key_lock_) = -1;
Executor::Args::NodeOutputsCallback node_outputs_callback_ = nullptr;

TF_DISALLOW_COPY_AND_ASSIGN(NTSession);

Expand Down
Loading

0 comments on commit dab137c

Please sign in to comment.