Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert TF changes on DEVEL #27710

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 102 additions & 78 deletions DQMServices/Core/src/ROOTFilePB.pb.cc

Large diffs are not rendered by default.

152 changes: 72 additions & 80 deletions DQMServices/Core/src/ROOTFilePB.pb.h

Large diffs are not rendered by default.

3 changes: 0 additions & 3 deletions PhysicsTools/TensorFlow/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
<flags REM_CXXFLAGS="-Werror=sign-compare" />

<use name="tensorflow" />
<use name="tensorflow-cc" />

<use name="FWCore/Framework" />
Expand Down
2 changes: 1 addition & 1 deletion PhysicsTools/TensorFlow/interface/TensorFlow.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* TensorFlow interface helpers.
* Based on TensorFlow C++ API 1.13.
* Based on TensorFlow C++ API 1.3.
* For more info, see https://gitlab.cern.ch/mrieger/CMSSW-DNN.
*
* Author: Marcel Rieger
Expand Down
733 changes: 263 additions & 470 deletions PhysicsTools/TensorFlow/src/NTSession.cc

Large diffs are not rendered by default.

85 changes: 16 additions & 69 deletions PhysicsTools/TensorFlow/src/NTSession.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ limitations under the License.

/*
This file is an adaptation of the original direct_session.h file located at
https://raw.githubusercontent.com/tensorflow/tensorflow/v1.13.1/tensorflow/core/common_runtime/direct_session.h
https://github.com/tensorflow/tensorflow/blob/v1.6.0/tensorflow/core/common_runtime/direct_session.h
to meet the demands of the software environment developed and used by the CMS collaboration.

Changes:
Expand Down Expand Up @@ -51,7 +51,6 @@ to meet the demands of the software environment developed and used by the CMS co
#include "tensorflow/core/common_runtime/rendezvous_mgr.h"
#include "tensorflow/core/common_runtime/session_factory.h"
#include "tensorflow/core/framework/cancellation.h"
#include "tensorflow/core/framework/collective.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/session_state.h"
#include "tensorflow/core/framework/tensor.h"
Expand Down Expand Up @@ -124,17 +123,7 @@ namespace tensorflow {
cost_model_manager_.ExportCostModels(cost_models);
}

::tensorflow::Status MakeCallable(const CallableOptions& callable_options, CallableHandle* out_handle) override;
::tensorflow::Status RunCallable(CallableHandle handle,
const std::vector<Tensor>& feed_tensors,
std::vector<Tensor>* fetch_tensors,
RunMetadata* run_metadata) override;
::tensorflow::Status ReleaseCallable(CallableHandle handle) override;

private:
// For access to collective_graph_key_.
friend class NTSessionCollectiveTest;

// We create one executor and its dependent library runtime for
// every partition.
struct PerPartitionExecutorsAndLib {
Expand Down Expand Up @@ -166,10 +155,6 @@ namespace tensorflow {

DataTypeVector input_types;
DataTypeVector output_types;

CallableOptions callable_options;

int64 collective_graph_key = BuildGraphOptions::kNoCollectiveGraphKey;
};

// A FunctionInfo object is created for every unique set of feeds/fetches.
Expand All @@ -196,7 +181,6 @@ namespace tensorflow {
mutex mu_;
Status status GUARDED_BY(mu_);
IntraProcessRendezvous* rendez = nullptr;
std::unique_ptr<CollectiveExecutor::Handle> collective_executor;
std::unique_ptr<StepStatsCollector> collector;
Notification executors_done;
std::unordered_map<string, bool> pending_inputs; // true if fed
Expand Down Expand Up @@ -224,13 +208,12 @@ namespace tensorflow {
string handle;
std::unique_ptr<Graph> graph;
const DebugOptions& debug_options;
int64 collective_graph_key = BuildGraphOptions::kNoCollectiveGraphKey;
};

// Initializes the base execution state given the 'graph',
// if not already initialized.
Status MaybeInitializeExecutionState(const GraphDef& graph, bool* out_already_initialized)
EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_);
EXCLUSIVE_LOCKS_REQUIRED(graph_def_lock_);

// Retrieves an already existing set of executors to run 'inputs' and
// 'outputs', or creates and caches them for future use.
Expand All @@ -240,13 +223,6 @@ namespace tensorflow {
ExecutorsAndKeys** executors_and_keys,
RunStateArgs* run_state_args);

// Creates a set of executors to run the subgraph defined by
// `callable_options`.
::tensorflow::Status CreateExecutors(const CallableOptions& callable_options,
std::unique_ptr<ExecutorsAndKeys>* out_executors_and_keys,
std::unique_ptr<FunctionInfo>* out_func_info,
RunStateArgs* run_state_args);

// Creates several graphs given the existing graph_def_ and the
// input feeds and fetches, given 'devices'. The graphs share a common
// function library 'flib_def'.
Expand All @@ -255,21 +231,9 @@ namespace tensorflow {
std::unique_ptr<FunctionLibraryDefinition>* flib_def,
RunStateArgs* run_state_args,
DataTypeVector* input_types,
DataTypeVector* output_types,
int64* collective_graph_key);

::tensorflow::Status RunInternal(int64 step_id,
const RunOptions& run_options,
CallFrameInterface* call_frame,
ExecutorsAndKeys* executors_and_keys,
RunMetadata* run_metadata);

// Returns whether inter-op execution uses a global pool or the input
// `run_options` requests being run on inter_op_thread_pool = 0 in case
// multiple pools are configured.
bool ShouldUseRunHandlerPool(const RunOptions& run_options) const;
DataTypeVector* output_types);

::tensorflow::Status ExtendLocked(const GraphDef& graph) EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_);
::tensorflow::Status ExtendLocked(const GraphDef& graph) EXCLUSIVE_LOCKS_REQUIRED(graph_def_lock_);

::tensorflow::Status ResourceHandleToInputTensor(const Tensor& resource_tensor, Tensor* retrieved_tensor);

Expand Down Expand Up @@ -306,18 +270,12 @@ namespace tensorflow {
return ::tensorflow::Status::OK();
}

::tensorflow::Status CheckGraphCreated(const char* method) {
mutex_lock l(graph_state_lock_);
if (!graph_created_) {
return errors::InvalidArgument("Session was not created with a graph before ", method, "!");
}
return ::tensorflow::Status::OK();
}

::tensorflow::Status CreateDebuggerState(const CallableOptions& options,
int64 global_step,
::tensorflow::Status CreateDebuggerState(const DebugOptions& debug_options,
int64 session_run_index,
int64 executor_step_index,
const std::vector<string>& input_names,
const std::vector<string>& output_names,
const std::vector<string>& target_names,
std::unique_ptr<DebuggerStateInterface>* debugger_state);

::tensorflow::Status DecorateAndPublishGraphForDebug(const DebugOptions& debug_options,
Expand All @@ -332,8 +290,10 @@ namespace tensorflow {
DeviceSet device_set_;

string session_handle_;
mutex graph_state_lock_;
bool graph_created_ GUARDED_BY(graph_state_lock_) = false;
bool graph_created_ GUARDED_BY(graph_def_lock_) = false;

mutex graph_def_lock_;
GraphDef graph_def_ GUARDED_BY(graph_def_lock_);

Status init_error_; // Set to an error if construction failed.

Expand All @@ -351,16 +311,6 @@ namespace tensorflow {
// same ExecutorsAndKey object.
std::unordered_map<string, std::shared_ptr<ExecutorsAndKeys>> executors_ GUARDED_BY(executor_lock_);

class RunCallableCallFrame;
struct Callable {
std::shared_ptr<ExecutorsAndKeys> executors_and_keys;
std::shared_ptr<FunctionInfo> function_info;
~Callable();
};
mutex callables_lock_;
int64 next_callable_handle_ GUARDED_BY(callables_lock_) = 0;
std::unordered_map<int64, Callable> callables_ GUARDED_BY(callables_lock_);

// Holds mappings from handle to partial run state.
std::unordered_map<string, std::unique_ptr<RunState>> partial_runs_ GUARDED_BY(executor_lock_);

Expand All @@ -369,16 +319,15 @@ namespace tensorflow {

NTSessionFactory* const factory_; // not owned
CancellationManager* cancellation_manager_;
std::unique_ptr<CollectiveExecutorMgrInterface> collective_executor_mgr_;

// Map of placed stateful nodes, i.e. nodes for which is_stateful()
// is true, such as "params" and "queue" nodes. Once placed these
// nodes can not be moved to a different device. Maps node names to
// device names.
std::unordered_map<string, string> stateful_placements_ GUARDED_BY(graph_state_lock_);
std::unordered_map<string, string> stateful_placements_ GUARDED_BY(graph_def_lock_);

// Execution_state; used when placing the entire graph.
std::unique_ptr<GraphExecutionState> execution_state_ GUARDED_BY(graph_state_lock_);
std::unique_ptr<GraphExecutionState> execution_state_ GUARDED_BY(graph_def_lock_);

// The function library, before any rewrites or optimizations have been
// performed. In particular, CreateGraphs() may need to modify the function
Expand All @@ -393,7 +342,7 @@ namespace tensorflow {
std::atomic<int64> edge_name_counter_ = {0};
std::atomic<int64> handle_name_counter_ = {0};

// For generating step ids that are unique across this sessions.
// For generating step ids that are unique across all sessions.
static std::atomic_int_fast64_t step_id_counter_;

// Global timeout for all blocking operations in this session.
Expand All @@ -402,9 +351,7 @@ namespace tensorflow {
// Manages all the cost models for the graphs executed in this session.
CostModelManager cost_model_manager_;

// For testing collective graph key generation.
mutex collective_graph_key_lock_;
int64 collective_graph_key_ GUARDED_BY(collective_graph_key_lock_) = -1;
Executor::Args::NodeOutputsCallback node_outputs_callback_ = nullptr;

TF_DISALLOW_COPY_AND_ASSIGN(NTSession);

Expand Down
Loading