-
Notifications
You must be signed in to change notification settings - Fork 236
/
Copy pathtest_local_cost_estimator.cc
77 lines (66 loc) · 2.77 KB
/
test_local_cost_estimator.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#include "doctest/doctest.h"
#include "kernels/local_cuda_allocator.h"
#include "kernels/managed_per_device_ff_handle.h"
#include "local-execution/local_cost_estimator.h"
#include "op-attrs/ops/attention.h"
#include "op-attrs/parallel_tensor_shape.h"
#include "pcg/computation_graph_builder.h"
#include "test_utils.h"
using namespace ::FlexFlow;
TEST_SUITE(FF_CUDA_TEST_SUITE) {
TEST_CASE("Local Cost Estimator") {
// local backing initialization
ManagedPerDeviceFFHandle managed_handle{};
RuntimeArgConfig runtime_arg_config = RuntimeArgConfig{
DeviceSpecific<PerDeviceFFHandle>::create(managed_handle.raw_handle()),
EnableProfiling::YES,
ProfilingSettings{/*warmup_iters=*/0,
/*measure_iters=*/1}};
LocalCostEstimator cost_estimator = LocalCostEstimator{runtime_arg_config};
SUBCASE("Estimate cost -- Attention Op") {
int embed_dim = 32;
int num_heads = 10;
MultiHeadAttentionAttrs attrs = MultiHeadAttentionAttrs{
/*embed_dim=*/embed_dim,
/*num_heads=*/num_heads,
/*kdim=*/embed_dim,
/*vdim=*/embed_dim,
/*dropout=*/0.0,
/*bias=*/false,
/*add_bias_kv=*/false,
/*add_zero_attn=*/false,
};
size_t batch_size = 40;
size_t seq_len = 48;
size_t feature_size = 36;
DataType dtype = DataType::FLOAT;
ParallelTensorShape inputs_shape = lift_to_parallel(TensorShape{
TensorDims{FFOrdered<size_t>{batch_size, seq_len, feature_size}},
DataType::FLOAT,
});
ParallelTensorShape weights_shape = throw_if_unexpected(
get_weights_shape(attrs, inputs_shape, inputs_shape, inputs_shape));
ParallelTensorAttrs weight_attrs =
ParallelTensorAttrs{weights_shape,
/*sync_type=*/std::nullopt,
/*initializer=*/std::nullopt,
CreateGrad::YES};
ParallelTensorShape output_shape = throw_if_unexpected(
get_output_shape(attrs, inputs_shape, inputs_shape, inputs_shape));
ParallelTensorAttrs output_attrs =
ParallelTensorAttrs{output_shape,
/*sync_type=*/std::nullopt,
/*initializer=*/std::nullopt,
CreateGrad::YES};
CostDetails result = cost_estimator.estimate_cost(
PCGOperatorAttrs{attrs},
std::vector<ParallelTensorShape>{
inputs_shape, inputs_shape, inputs_shape},
std::vector<ParallelTensorAttrs>{weight_attrs},
std::vector<ParallelTensorAttrs>{output_attrs},
make_1d_machine_view(gpu_id_t{0}, gpu_id_t{1}));
CHECK(result.total_elapsed_time > 0);
CHECK(result.total_mem_usage > 0);
}
}
}