Skip to content

Commit

Permalink
Merged PR 1072: - Remove construction of MLValue name -> idx mapping …
Browse files Browse the repository at this point in the history
…from execution

- Remove construction of MLValue name -> idx mapping from execution
  frame to inference session since it needs to be done per session only.
- Minor change in Executor interface (eliminate one heap allocation).

Related work items: #60
  • Loading branch information
Pranav Sharma authored and Pranav Sharma committed Mar 20, 2018
2 parents 654d9ce + 0f341fd commit 7656e35
Show file tree
Hide file tree
Showing 14 changed files with 602 additions and 450 deletions.
196 changes: 99 additions & 97 deletions lotus/core/framework/execution_frame.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,116 +4,118 @@

namespace Lotus {

ExecutionFrame::ExecutionFrame(const std::unordered_map<std::string, MLValue>& feeds,
const std::vector<std::string>& output_names,
const SessionState& session_state) {
Init(session_state.GetGraph(), feeds, output_names, session_state);
InitArenas();
}
ExecutionFrame::ExecutionFrame(const std::unordered_map<std::string, MLValue>& feeds,
const std::vector<std::string>& output_names,
const SessionState& session_state)
: session_state_(session_state) {
Init(session_state.GetGraph(), feeds, output_names);
InitArenas();
}

Status ExecutionFrame::AllocateTensorWithSelfOwnBuffer(
const int index, const MLDataType element_type,
const AllocatorInfo& location, const TensorShape& shape) {
LOTUS_ENFORCE(index >= 0 && index < node_values_.size());
auto value = node_values_[index];
LOTUS_ENFORCE(!value->IsAllocated());
IAllocator* alloc = GetArena(location);
void* buffer = alloc->Alloc(element_type->Size() * shape.Size());
Tensor* tensor = new Tensor(
Status ExecutionFrame::AllocateTensorWithSelfOwnBuffer(const int index,
const MLDataType element_type,
const AllocatorInfo& location,
const TensorShape& shape) {
LOTUS_ENFORCE(index >= 0 && index < node_values_.size());
auto value = node_values_[index];
LOTUS_ENFORCE(!value->IsAllocated());
IAllocator* alloc = GetArena(location);
void* buffer = alloc->Alloc(element_type->Size() * shape.Size());
Tensor* tensor = new Tensor(
element_type,
shape,
std::move(BufferUniquePtr(buffer, BufferDeleter(alloc))),
location);
value->Init(tensor,
DataTypeImpl::GetType<Tensor>(),
DataTypeImpl::GetType<Tensor>()->GetDeleteFunc());
return Status::OK();
}
value->Init(tensor,
DataTypeImpl::GetType<Tensor>(),
DataTypeImpl::GetType<Tensor>()->GetDeleteFunc());
return Status::OK();
}

Status ExecutionFrame::AllocateTensorWithPreAllocateBuffer(
const int offset, void* pBuffer, const MLDataType element_type,
const AllocatorInfo& location, const TensorShape& shape) {
LOTUS_ENFORCE(offset >= 0 && offset < node_values_.size());
auto value = node_values_[offset];
LOTUS_ENFORCE(!value->IsAllocated());
Status ExecutionFrame::AllocateTensorWithPreAllocateBuffer(const int offset,
void* pBuffer,
const MLDataType element_type,
const AllocatorInfo& location,
const TensorShape& shape)
{
LOTUS_ENFORCE(offset >= 0 && offset < node_values_.size());
auto value = node_values_[offset];
LOTUS_ENFORCE(!value->IsAllocated());

Tensor* tensor = new Tensor(element_type, shape, pBuffer, location);
value->Init(tensor,
DataTypeImpl::GetType<Tensor>(),
DataTypeImpl::GetType<Tensor>()->GetDeleteFunc());
return Status::OK();
}
Tensor* tensor = new Tensor(element_type,
shape,
pBuffer,
location);

void ExecutionFrame::Release(const int offset) {
LOTUS_ENFORCE(offset >= 0 && offset < node_offsets_.size());
node_values_[offset]->Reset();
}
value->Init(tensor,
DataTypeImpl::GetType<Tensor>(),
DataTypeImpl::GetType<Tensor>()->GetDeleteFunc());
return Status::OK();
}

void ExecutionFrame::Init(const LotusIR::Graph* graph,
const std::unordered_map<string, MLValue>& feeds,
const std::vector<string>& output_names,
const SessionState& session_state) {
LOTUS_ENFORCE(graph);
// 1. construct the value name to index map
// It seems not efficient to construct this map everytime
// If planner could provide this, we can pass in the map.
// TODO: avoid const_cast here since this operation can be performed
// in the inference session once and avoided each time execution is
// called.
std::vector<LotusIR::NODEINDEX>* nodes;
auto status = const_cast<LotusIR::Graph*>(graph)->GetNodesInTopologicalOrder(&nodes);
LOTUS_ENFORCE(status.IsOK());
auto num_nodes = nodes->size();
node_offsets_.resize(num_nodes);
int current = 0;
for (int i = 0; i < num_nodes; i++) {
auto node = graph->GetNode((*nodes)[i]);
auto& inputs = node->InputDefs();
for (auto def : inputs) {
if (value_name_to_index_.find(def->Name()) ==
value_name_to_index_.end())
value_name_to_index_[def->Name()] = current++;
}
auto& outputs = node->OutputDefs();
for (auto def : outputs) {
if (value_name_to_index_.find(def->Name()) ==
value_name_to_index_.end())
value_name_to_index_[def->Name()] = current++;
}
}
void ExecutionFrame::Release(const int offset)
{
LOTUS_ENFORCE(offset >= 0 && offset < node_offsets_.size());
node_values_[offset]->Reset();
}

// 2. resize the all_value_ vector
auto num_values = value_name_to_index_.size();
all_values_.resize(num_values);
void ExecutionFrame::Init(const LotusIR::Graph* graph,
const std::unordered_map<string, MLValue>& feeds,
const std::vector<string>& output_names)
{
LOTUS_ENFORCE(graph);

// 3. handle feed in values
for (auto it = feeds.begin(); it != feeds.end(); it++) {
auto index_it = value_name_to_index_.find(it->first);
LOTUS_ENFORCE(index_it != value_name_to_index_.end());
// we are sharing the underline tensor/object for MLValue
all_values_[index_it->second] = it->second;
}
//1. resize the node_offsets and all_value_ vector
auto num_nodes = graph->NumberOfNodes();
node_offsets_.resize(num_nodes);

all_values_.resize(session_state_.GetMaxMLValueIdx() + 1);

//2. handle feed in values
for (auto it = feeds.begin(); it != feeds.end(); it++)
{
int index;
Common::Status status = session_state_.GetMLValueIdx(it->first, &index);
LOTUS_ENFORCE(status.IsOK());
// we are sharing the underline tensor/object for MLValue
all_values_[index] = it->second;
}

//3. Todo: handle the weights.

// 4. Todo: handle the weights.
UNUSED_PARAMETER(session_state);
//4. set node args

// 5. set node args
for (int i = 0; i < num_nodes; i++) {
auto node = graph->GetNode((*nodes)[i]);
LOTUS_ENFORCE(node && node->Index() < node_offsets_.size());
node_offsets_[node->Index()] = (int)node_values_.size();
auto& inputs = node->InputDefs();
for (auto def : inputs) {
SetupNodeArg(def, value_name_to_index_);
}
auto& outputs = node->OutputDefs();
for (auto def : outputs) {
SetupNodeArg(def, value_name_to_index_);
}
// TODO const_cast is needed due to the lack of a const iterator in the graph
Graph* p_graph = const_cast<Graph*>(graph);

for (auto node_it = p_graph->Nodes_begin(); node_it != p_graph->Nodes_end(); ++node_it) {
auto node = *node_it;
LOTUS_ENFORCE(node && node->Index() < node_offsets_.size());
node_offsets_[node->Index()] = static_cast<int>(node_values_.size());
auto& inputs = node->InputDefs();
for (auto def : inputs)
{
SetupNodeArg(def);
}
auto& outputs = node->OutputDefs();
for (auto def : outputs)
{
SetupNodeArg(def);
}

// 6. for outputs, we may limit the buffer strategy, for example,
// output tensor should always use its own buffer. TBD
UNUSED_PARAMETER(output_names);
}

//5. for outputs, we may limit the buffer strategy, for example,
// output tensor should always use its own buffer. TBD
UNUSED_PARAMETER(output_names);
}

void ExecutionFrame::SetupNodeArg(LotusIR::NodeArg* arg)
{
LOTUS_ENFORCE(arg);
auto& name = arg->Name();
int index;
Common::Status status = session_state_.GetMLValueIdx(name, &index);
LOTUS_ENFORCE(status.IsOK());
node_values_.push_back(&all_values_[index]);
}
}
Loading

0 comments on commit 7656e35

Please sign in to comment.