-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathdecisiontrees.proto
151 lines (121 loc) · 3.2 KB
/
decisiontrees.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
package protobufs;
enum LossFunction {
LOGIT = 1;
LEAST_ABSOLUTE_DEVIATION = 2;
HUBER = 3;
}
enum Rescaling {
NONE = 1;
AVERAGING = 2;
LOG_ODDS = 3;
}
message Feature {
optional int64 feature = 1;
optional double value = 2;
}
message Example {
optional double label = 1;
optional double weightedLabel = 2;
repeated double features = 3 [packed=true];
}
message TrainingData {
repeated Example train = 1;
repeated Example test = 2;
}
message TreeNode {
// feature to split on
optional int64 feature = 1;
// value to split on
optional double splitValue = 2;
optional TreeNode left = 3;
optional TreeNode right = 4;
optional double leafValue = 5;
optional Annotation annotation = 6;
}
message Annotation {
optional int64 numExamples = 1;
optional double averageGain = 2;
// Proportion of examples on the left branch.
// Used to annotate branch probabilities in compiled tree models
optional double leftFraction = 3;
}
message Forest {
repeated TreeNode trees = 1;
optional Rescaling rescaling = 2 [default=NONE];
}
message SplittingConstraints {
optional int64 maximumLevels = 1;
optional double minimumAverageGain = 2;
optional int64 minimumSamplesAtLeaf = 3;
}
message PruningConstraints {
optional int64 crossValidationFolds = 1;
}
message InfluenceTrimmingConfig {
optional double alpha = 1;
optional int64 warmupRounds = 2;
}
message LossFunctionConfig {
optional LossFunction lossFunction = 1;
optional double huberAlpha = 2;
}
message ShrinkageConfig {
optional double shrinkage = 1;
}
message StochasticityConfig {
// Take a random sample of training data per round
// Used in stochastic gradient boosting
optional double perRoundSamplingRate = 1;
// Proportion of examples to draw boostrap sample from
// Used in random forests
optional double exampleBoostrapProportion = 2;
// Number of features to examine at each splitting step
// Used in random forests
optional int64 featureSampleSize = 3;
}
enum Algorithm {
BOOSTING = 1;
RANDOM_FOREST = 2;
}
message ForestConfig {
optional int64 numWeakLearners = 1;
optional SplittingConstraints splittingConstraints = 2;
optional LossFunctionConfig lossFunctionConfig = 3;
optional InfluenceTrimmingConfig influenceTrimmingConfig = 4;
optional ShrinkageConfig shrinkageConfig = 5;
optional StochasticityConfig stochasticityConfig = 6;
optional Algorithm algorithm = 7;
}
enum TrainingStatus {
UNCLAIMED = 1;
PROCESSING = 2;
FINISHED = 3;
}
message GridFsConfig {
optional string database = 1;
optional string collection = 2 [default="fs"];
optional string file = 3;
}
enum DataSource {
GRIDFS = 1;
}
message DataSourceConfig {
optional DataSource dataSource = 1;
optional GridFsConfig gridFsConfig = 2;
}
message EpochResult {
optional double roc = 1;
optional double logScore = 2;
optional double normalizedEntropy = 3;
optional double calibration = 4;
}
message TrainingResults {
repeated EpochResult epochResults = 1;
}
message TrainingRow {
optional ForestConfig forestConfig = 1;
optional Forest forest = 2;
optional DataSourceConfig dataSourceConfig = 3;
optional TrainingStatus trainingStatus = 4;
optional TrainingResults trainingResults = 5;
}