Merged PR 1072: - Remove construction of MLValue name -> idx mapping …

…from execution - Remove construction of MLValue name -> idx mapping from execution frame to inference session since it needs to be done per session only. - Minor change in Executor interface (eliminate one heap allocation). Related work items: #60
microsoft · Mar 20, 2018 · 7656e35 · 7656e35
2 parents 654d9ce + 0f341fd
commit 7656e35
Show file tree

Hide file tree

Showing 14 changed files with 602 additions and 450 deletions.
diff --git a/lotus/core/framework/execution_frame.cc b/lotus/core/framework/execution_frame.cc
@@ -4,116 +4,118 @@
 
 namespace Lotus {
 
-  ExecutionFrame::ExecutionFrame(const std::unordered_map<std::string, MLValue>& feeds,
-                                 const std::vector<std::string>& output_names,
-                                 const SessionState& session_state) {
-    Init(session_state.GetGraph(), feeds, output_names, session_state);
-    InitArenas();
-  }
+ExecutionFrame::ExecutionFrame(const std::unordered_map<std::string, MLValue>& feeds,
+                               const std::vector<std::string>& output_names,
+                               const SessionState& session_state)
+    : session_state_(session_state) {
+  Init(session_state.GetGraph(), feeds, output_names);
+  InitArenas();
+}
 
-  Status ExecutionFrame::AllocateTensorWithSelfOwnBuffer(
-            const int index, const MLDataType element_type,
-            const AllocatorInfo& location, const TensorShape& shape) {
-    LOTUS_ENFORCE(index >= 0 && index < node_values_.size());
-    auto value = node_values_[index];
-    LOTUS_ENFORCE(!value->IsAllocated());
-    IAllocator* alloc = GetArena(location);
-    void* buffer = alloc->Alloc(element_type->Size() * shape.Size());
-    Tensor* tensor = new Tensor(
+Status ExecutionFrame::AllocateTensorWithSelfOwnBuffer(const int index,
+                                                       const MLDataType element_type,
+                                                       const AllocatorInfo& location,
+                                                       const TensorShape& shape) {
+  LOTUS_ENFORCE(index >= 0 && index < node_values_.size());
+  auto value = node_values_[index];
+  LOTUS_ENFORCE(!value->IsAllocated());
+  IAllocator* alloc = GetArena(location);
+  void* buffer = alloc->Alloc(element_type->Size() * shape.Size());
+  Tensor* tensor = new Tensor(
       element_type, 
       shape, 
       std::move(BufferUniquePtr(buffer, BufferDeleter(alloc))), 
       location);
-    value->Init(tensor, 
-                DataTypeImpl::GetType<Tensor>(),
-                DataTypeImpl::GetType<Tensor>()->GetDeleteFunc());
-    return Status::OK();
-  }
+  value->Init(tensor, 
+              DataTypeImpl::GetType<Tensor>(),
+              DataTypeImpl::GetType<Tensor>()->GetDeleteFunc());
+  return Status::OK();
+}
 
-  Status ExecutionFrame::AllocateTensorWithPreAllocateBuffer(
-            const int offset, void* pBuffer, const MLDataType element_type,
-            const AllocatorInfo& location, const TensorShape& shape) {
-    LOTUS_ENFORCE(offset >= 0 && offset < node_values_.size());
-    auto value = node_values_[offset];
-    LOTUS_ENFORCE(!value->IsAllocated());
+Status ExecutionFrame::AllocateTensorWithPreAllocateBuffer(const int offset,
+                                                           void* pBuffer,
+                                                           const MLDataType element_type,
+                                                           const AllocatorInfo& location,
+                                                           const TensorShape& shape)
+{
+  LOTUS_ENFORCE(offset >= 0 && offset < node_values_.size());
+  auto value = node_values_[offset];
+  LOTUS_ENFORCE(!value->IsAllocated());
 
-    Tensor* tensor = new Tensor(element_type, shape, pBuffer, location);
-    value->Init(tensor,
-                DataTypeImpl::GetType<Tensor>(),
-                DataTypeImpl::GetType<Tensor>()->GetDeleteFunc());
-    return Status::OK();
-  }
+  Tensor* tensor = new Tensor(element_type, 
+                              shape, 
+                              pBuffer, 
+                              location);
 
-  void ExecutionFrame::Release(const int offset) {
-    LOTUS_ENFORCE(offset >= 0 && offset < node_offsets_.size());
-    node_values_[offset]->Reset();
-  }
+  value->Init(tensor,
+              DataTypeImpl::GetType<Tensor>(),
+              DataTypeImpl::GetType<Tensor>()->GetDeleteFunc());
+  return Status::OK();
+}
 
-  void ExecutionFrame::Init(const LotusIR::Graph* graph,
-                            const std::unordered_map<string, MLValue>& feeds,
-                            const std::vector<string>& output_names,
-                            const SessionState& session_state) {
-    LOTUS_ENFORCE(graph);
-    // 1. construct the value name to index map
-    // It seems not efficient to construct this map everytime
-    // If planner could provide this, we can pass in the map.
-    // TODO: avoid const_cast here since this operation can be performed
-    // in the inference session once and avoided each time execution is
-    // called.
-    std::vector<LotusIR::NODEINDEX>* nodes;
-    auto status = const_cast<LotusIR::Graph*>(graph)->GetNodesInTopologicalOrder(&nodes);
-    LOTUS_ENFORCE(status.IsOK());
-    auto num_nodes = nodes->size();
-    node_offsets_.resize(num_nodes);
-    int current = 0;
-    for (int i = 0; i < num_nodes; i++) {
-      auto node = graph->GetNode((*nodes)[i]);
-      auto& inputs = node->InputDefs();
-      for (auto def : inputs) {
-        if (value_name_to_index_.find(def->Name()) ==
-            value_name_to_index_.end())
-          value_name_to_index_[def->Name()] = current++;
-      }
-      auto& outputs = node->OutputDefs();
-      for (auto def : outputs) {
-        if (value_name_to_index_.find(def->Name()) ==
-            value_name_to_index_.end())
-          value_name_to_index_[def->Name()] = current++;
-      }
-    }
+void ExecutionFrame::Release(const int offset)
+{
+  LOTUS_ENFORCE(offset >= 0 && offset < node_offsets_.size());
+  node_values_[offset]->Reset();
+}
 
-    // 2. resize the all_value_ vector
-    auto num_values = value_name_to_index_.size();
-    all_values_.resize(num_values);
+void ExecutionFrame::Init(const LotusIR::Graph* graph,
+                          const std::unordered_map<string, MLValue>& feeds,
+                          const std::vector<string>& output_names)
+{
+  LOTUS_ENFORCE(graph);
 
-    // 3. handle feed in values
-    for (auto it = feeds.begin(); it != feeds.end(); it++) {
-      auto index_it = value_name_to_index_.find(it->first);
-      LOTUS_ENFORCE(index_it != value_name_to_index_.end());
-      // we are sharing the underline tensor/object for MLValue
-      all_values_[index_it->second] = it->second;
-    }
+  //1. resize the node_offsets and all_value_ vector        
+  auto num_nodes = graph->NumberOfNodes();
+  node_offsets_.resize(num_nodes);
+
+  all_values_.resize(session_state_.GetMaxMLValueIdx() + 1);
+
+  //2. handle feed in values
+  for (auto it = feeds.begin(); it != feeds.end(); it++)
+  {
+    int index;
+    Common::Status status = session_state_.GetMLValueIdx(it->first, &index);
+    LOTUS_ENFORCE(status.IsOK());
+    // we are sharing the underline tensor/object for MLValue
+    all_values_[index] = it->second;
+  }
+
+  //3. Todo: handle the weights.
 
-    // 4. Todo: handle the weights.
-    UNUSED_PARAMETER(session_state);
+  //4. set node args
 
-    // 5. set node args
-    for (int i = 0; i < num_nodes; i++) {
-      auto node = graph->GetNode((*nodes)[i]);
-      LOTUS_ENFORCE(node && node->Index() < node_offsets_.size());
-      node_offsets_[node->Index()] = (int)node_values_.size();
-      auto& inputs = node->InputDefs();
-      for (auto def : inputs) {
-        SetupNodeArg(def, value_name_to_index_);
-      }
-      auto& outputs = node->OutputDefs();
-      for (auto def : outputs) {
-        SetupNodeArg(def, value_name_to_index_);
-      }
+  // TODO const_cast is needed due to the lack of a const iterator in the graph
+  Graph* p_graph = const_cast<Graph*>(graph);
+
+  for (auto node_it = p_graph->Nodes_begin(); node_it != p_graph->Nodes_end(); ++node_it) {
+    auto node = *node_it;
+    LOTUS_ENFORCE(node && node->Index() < node_offsets_.size());
+    node_offsets_[node->Index()] = static_cast<int>(node_values_.size());
+    auto& inputs = node->InputDefs();
+    for (auto def : inputs)
+    {
+      SetupNodeArg(def);
+    }
+    auto& outputs = node->OutputDefs();
+    for (auto def : outputs)
+    {
+      SetupNodeArg(def);
     }
-
-    // 6. for outputs, we may limit the buffer strategy, for example,
-    // output tensor should always use its own buffer. TBD
-    UNUSED_PARAMETER(output_names);
   }
+
+  //5. for outputs, we may limit the buffer strategy, for example,
+  // output tensor should always use its own buffer. TBD
+  UNUSED_PARAMETER(output_names);
+}
+
+void ExecutionFrame::SetupNodeArg(LotusIR::NodeArg* arg)
+{
+  LOTUS_ENFORCE(arg);
+  auto& name = arg->Name();
+  int index;
+  Common::Status status = session_state_.GetMLValueIdx(name, &index);
+  LOTUS_ENFORCE(status.IsOK());
+  node_values_.push_back(&all_values_[index]);
+}
 }