-
Notifications
You must be signed in to change notification settings - Fork 236
/
Copy pathlocal_cost_estimator.cc
105 lines (88 loc) · 3.88 KB
/
local_cost_estimator.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#include "local-execution/local_cost_estimator.h"
#include "kernels/device.h"
#include "kernels/local_cuda_allocator.h"
#include "local-execution/tracked_allocator.h"
#include "op-attrs/computation_graph_op_attrs.h"
#include "op-attrs/pcg_operator_attrs.h"
#include "pcg/computation_graph_builder.h"
#include "pcg/parallel_tensor_attrs.h"
#include "utils/containers/transform.h"
namespace FlexFlow {
static float get_total_elapsed_time(PerLayerElapsedTime const &fwd,
PerLayerElapsedTime const &bwd) {
float total_elapsed_time = 0;
for (auto const &layer_elapsed_time : fwd) {
layer_guid_t layer_id = layer_elapsed_time.first;
float fwd_time = layer_elapsed_time.second.value();
float bwd_time = bwd.at(layer_id).value();
total_elapsed_time += fwd_time + bwd_time;
}
return total_elapsed_time;
}
LocalCostEstimator::LocalCostEstimator(RuntimeArgConfig const &config)
: runtime_arg_config(config) {}
CostDetails LocalCostEstimator::estimate_cost(
PCGOperatorAttrs const &op,
std::vector<ParallelTensorShape> const &inputs,
std::vector<ParallelTensorAttrs> const &weights,
std::vector<ParallelTensorAttrs> const &outputs,
MachineView const &mv) const {
if (is_parallel_op(op) || op.has<InputAttrs>() || op.has<NoopAttrs>() ||
op.has<WeightAttrs>()) {
return CostDetails{0, 0};
}
LayerAttrs layer_attrs =
LayerAttrs{compgraph_op_attrs_from_pcg_op_attrs(op), std::nullopt};
// allocate memory for inputs
std::shared_ptr<TrackedAllocator> tracked_allocator_ptr =
std::make_shared<TrackedAllocator>(create_local_cuda_memory_allocator());
Allocator allocator = Allocator(tracked_allocator_ptr);
TensorBackingMap tensor_backing_map;
std::vector<tensor_guid_t> input_tensor_ids;
ComputationGraphBuilder cg_builder;
for (ParallelTensorShape const &input : inputs) {
TensorShape tensor_shape = get_piece_shape(input);
tensor_guid_t tensor_id =
cg_builder.create_tensor(tensor_shape, CreateGrad::YES);
GenericTensorAccessorW tensor_backing =
allocator.allocate_tensor(tensor_shape);
tensor_backing_map.insert({tensor_id, tensor_backing});
input_tensor_ids.push_back(tensor_id);
}
auto get_vector_piece_attrs =
[](std::vector<ParallelTensorAttrs> const ¶llel_attrs) {
return transform(parallel_attrs, [](ParallelTensorAttrs const &p) {
return get_piece_attrs(p);
});
};
// add operator to graph
std::vector<tensor_guid_t> output_tensor_ids =
cg_builder.add_layer(layer_attrs,
input_tensor_ids,
get_vector_piece_attrs(weights),
get_vector_piece_attrs(outputs));
std::optional<ModelTrainingInstance> model_training_instance = std::nullopt;
LocalTrainingBacking local_backing(allocator,
cg_builder.computation_graph,
tensor_backing_map,
this->runtime_arg_config,
model_training_instance);
local_backing.execute_init();
PerLayerElapsedTime fwd = local_backing.execute_forward();
PerLayerElapsedTime bwd = local_backing.execute_backward();
return CostDetails{get_total_elapsed_time(fwd, bwd),
tracked_allocator_ptr->get_current_mem_usage()};
}
float LocalCostEstimator::estimate_cost(ParallelTensorShape const &tensor_shape,
MachineView const &src,
MachineView const &dst) const {
// TODO: model communication cost analytically
// https://github.com/flexflow/FlexFlow/issues/1414
// temporarily return 0
return 0.0;
}
CostEstimator
get_local_cost_estimator(RuntimeArgConfig const &runtime_arg_config) {
return CostEstimator::create<LocalCostEstimator>(runtime_arg_config);
}
} // namespace FlexFlow