Skip to content

Commit

Permalink
[REVIEW] support xgboost multi-class models in C/C++ layer in FIL (#2866
Browse files Browse the repository at this point in the history
)

* make num_classes significant in FLOAT_SCALAR case

* changelog; check correspondence between output_t::CLASS and num_classes

* misc copy-paste bugs/style

* Rename:

leaf_value_t into leaf_algo_t
FLOAT_SCALAR into FLOAT_SAME_CLASS
INT_CLASS_LABEL into CATEGORICAL_LEAF

* changelog

* leaf_payload_type* -> leaf_algo*

* comments

* addressed review comments

* changelog

* remove extra changes

* added tests

* remove unnecessary changes

* reorder switch(leaf_algo_t) and if(predict_proba)

It does not increase the size of the change, but makes the logic clearer
and will extend better to new leaf_algo_t

* fix leaf conversion for TREE_PER_CLASS

* review comments except max/argmax

* new argmax, not explicitly vectorized

* re-resolve merge conflict

* less code

* ...that works

* comments stash

* rename TREE_PER_CLASS -> GROVE_PER_CLASS

* shorten a test

* actually test GROVE_PER_CLASS

* addressed review; read standard a bit more carefully

* syncthreads comment

* correct error message

* review comments
  • Loading branch information
levsnv authored Oct 5, 2020
1 parent 207129b commit d7e1c46
Show file tree
Hide file tree
Showing 5 changed files with 404 additions and 65 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- PR #2789: Dask LabelEncoder
- PR #2152: add FIL C++ benchmark
- PR #2638: Improve cython build with custom `build_ext`
- PR #2866: Support XGBoost-style multiclass models (gradient boosted decision trees) in FIL C++
- PR #2874: Issue warning for degraded accuracy with float64 models in Treelite

## Improvements
Expand Down
38 changes: 34 additions & 4 deletions cpp/include/cuml/fil/fil.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,34 @@ struct sparse_node8_t : dense_node_t {
sparse_node8_t(dense_node_t dn) : dense_node_t(dn) {}
};

/** leaf_algo_t describes what the leaves in a FIL forest store (predict) */
/** leaf_algo_t describes what the leaves in a FIL forest store (predict)
and how FIL aggregates them into class margins/regression result/best class
**/
enum leaf_algo_t {
/** storing a class probability or regression summand */
/** storing a class probability or regression summand. We add all margins
together and determine regression result or use threshold to determine
one of the two classes. **/
FLOAT_UNARY_BINARY = 0,
/** storing a class label */
CATEGORICAL_LEAF = 1
/** storing a class label. Trees vote on the resulting class.
Probabilities are just normalized votes. */
CATEGORICAL_LEAF = 1,
/** 1-vs-rest, or tree-per-class, where trees are assigned round-robin to
consecutive categories and predict a floating-point margin. Used in
Gradient Boosted Decision Trees. We sum margins for each group separately
**/
GROVE_PER_CLASS = 2,
/** 1-vs-rest, or tree-per-class, where trees are assigned round-robin to
consecutive categories and predict a floating-point margin. Used in
Gradient Boosted Decision Trees. We sum margins for each group separately
This is a more specific version of GROVE_PER_CLASS.
_FEW_CLASSES means fewer (or as many) classes than threads. **/
GROVE_PER_CLASS_FEW_CLASSES = 3,
/** 1-vs-rest, or tree-per-class, where trees are assigned round-robin to
consecutive categories and predict a floating-point margin. Used in
Gradient Boosted Decision Trees. We sum margins for each group separately
This is a more specific version of GROVE_PER_CLASS.
_MANY_CLASSES means more classes than threads. **/
GROVE_PER_CLASS_MANY_CLASSES = 4,
// to be extended
};

Expand All @@ -145,6 +167,14 @@ template <>
struct leaf_output_t<leaf_algo_t::CATEGORICAL_LEAF> {
typedef int T;
};
template <>
struct leaf_output_t<leaf_algo_t::GROVE_PER_CLASS_FEW_CLASSES> {
typedef float T;
};
template <>
struct leaf_output_t<leaf_algo_t::GROVE_PER_CLASS_MANY_CLASSES> {
typedef float T;
};

/** node_init initializes node from paramters */
void node_init(dense_node_t* n, val_t output, float thresh, int fid,
Expand Down
103 changes: 74 additions & 29 deletions cpp/src/fil/fil.cu
Original file line number Diff line number Diff line change
Expand Up @@ -188,34 +188,51 @@ struct forest {
The multi-class classification / regression (CATEGORICAL_LEAF) predict() works as follows
(always 1 output):
RAW (no values set): output the label of the class with highest probability, else output label 0.
AVG is set: ignored
SIGMOID is set: ignored
CLASS is set: ignored
All other flags (AVG, SIGMOID, CLASS) are ignored
The multi-class classification / regression (GROVE_PER_CLASS) predict_proba() is not implemented
The multi-class classification / regression (GROVE_PER_CLASS) predict() works as follows
(always 1 output):
RAW (no values set): output the label of the class with highest margin,
equal margins resolved in favor of smaller label integer
All other flags (AVG, SIGMOID, CLASS) are ignored
*/
output_t ot = output_;
bool complement_proba = false, do_transform = global_bias_ != 0.0f;

if (leaf_algo_ == leaf_algo_t::FLOAT_UNARY_BINARY) {
if (predict_proba) {
params.num_outputs = 2;
ot = output_t(ot & ~output_t::CLASS); // no threshold on probabilities
complement_proba = true;
do_transform = true;
} else {
params.num_outputs = 1;
if (ot != output_t::RAW) do_transform = true;
bool complement_proba = false, do_transform;

if (predict_proba) {
// no threshold on probabilities
ot = output_t(ot & ~output_t::CLASS);

switch (leaf_algo_) {
case leaf_algo_t::FLOAT_UNARY_BINARY:
params.num_outputs = 2;
complement_proba = true;
do_transform = true;
break;
case leaf_algo_t::GROVE_PER_CLASS:
// TODO(levsnv): add softmax to implement predict_proba
ASSERT(
false,
"predict_proba not supported for multi-class gradient boosted "
"decision trees (encountered in xgboost, scikit-learn, lightgbm)");
case leaf_algo_t::CATEGORICAL_LEAF:
params.num_outputs = num_classes_;
do_transform = ot != output_t::RAW || global_bias_ != 0.0f;
break;
default:
ASSERT(false, "internal error: invalid leaf_algo_");
}
} else if (leaf_algo_ == leaf_algo_t::CATEGORICAL_LEAF) {
if (predict_proba) {
params.num_outputs = num_classes_;
ot = output_t(ot & ~output_t::CLASS); // no threshold on probabilities
if (ot != output_t::RAW) do_transform = true;
} else {
if (leaf_algo_ == leaf_algo_t::FLOAT_UNARY_BINARY) {
do_transform = ot != output_t::RAW || global_bias_ != 0.0f;
} else {
params.num_outputs = 1;
// moot since choosing best class and all transforms are monotonic
// also, would break current code
// GROVE_PER_CLASS, CATEGORICAL_LEAF: moot since choosing best class and
// all transforms are monotonic. also, would break current code
do_transform = false;
}
params.num_outputs = 1;
}

// Predict using the forest.
Expand Down Expand Up @@ -403,14 +420,21 @@ void check_params(const forest_params_t* params, bool dense) {
"regression");
}
break;
case leaf_algo_t::GROVE_PER_CLASS:
ASSERT(params->num_classes > 2,
"num_classes > 2 is required for leaf_algo == GROVE_PER_CLASS");
ASSERT(params->num_trees % params->num_classes == 0,
"num_classes must divide num_trees evenly for GROVE_PER_CLASS");
break;
case leaf_algo_t::CATEGORICAL_LEAF:
ASSERT(params->num_classes >= 2,
"num_classes >= 2 is required for "
"leaf_algo == CATEGORICAL_LEAF");
break;
default:
ASSERT(false,
"leaf_algo should be FLOAT_UNARY_BINARY or CATEGORICAL_LEAF");
"leaf_algo must be FLOAT_UNARY_BINARY, CATEGORICAL_LEAF"
" or GROVE_PER_CLASS");
}
// output_t::RAW == 0, and doesn't have a separate flag
output_t all_set =
Expand Down Expand Up @@ -527,6 +551,7 @@ void tl2fil_leaf_payload(fil_node_t* fil_node, const tl::Tree& tl_tree,
fil_node->val.idx = find_class_label_from_one_hot(&vec[0], vec.size());
break;
case leaf_algo_t::FLOAT_UNARY_BINARY:
case leaf_algo_t::GROVE_PER_CLASS:
fil_node->val.f = tl_tree.LeafValue(tl_node_id);
ASSERT(!tl_tree.HasLeafVector(tl_node_id),
"some but not all treelite leaves have leaf_vector()");
Expand Down Expand Up @@ -659,19 +684,39 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model,
"are supported for multi-class models");

} else {
params->num_classes = tl_params->output_class ? 2 : 1;
ASSERT(pred_transform == "sigmoid" || pred_transform == "identity",
"only sigmoid and identity values of pred_transform "
"are supported for binary classification and regression models");
params->leaf_algo = leaf_algo_t::FLOAT_UNARY_BINARY;
if (model.num_output_group > 1) {
params->num_classes = model.num_output_group;
ASSERT(tl_params->output_class,
"output_class==true is required for multi-class models");
ASSERT(pred_transform == "sigmoid" || pred_transform == "identity" ||
pred_transform == "max_index" ||
pred_transform == "multiclass_ova",
"only sigmoid, identity, max_index and multiclass_ova values of "
"pred_transform are supported for xgboost-style multi-class "
"classification models.");
// this function should not know how many threads per block will be used
params->leaf_algo = leaf_algo_t::GROVE_PER_CLASS;
} else {
params->num_classes = tl_params->output_class ? 2 : 1;
ASSERT(pred_transform == "sigmoid" || pred_transform == "identity",
"only sigmoid and identity values of pred_transform "
"are supported for binary classification and regression models.");
params->leaf_algo = leaf_algo_t::FLOAT_UNARY_BINARY;
}
}

params->num_cols = model.num_feature;

ASSERT(param.sigmoid_alpha == 1.0f, "sigmoid_alpha not supported");
params->global_bias = param.global_bias;
params->output = output_t::RAW;
if (tl_params->output_class) {
/** output_t::CLASS denotes using a threshold in FIL, when
predict_proba == false. For all multiclass models, the best class is
selected using argmax instead. This happens when either
leaf_algo == CATEGORICAL_LEAF or num_classes > 2.
**/
if (tl_params->output_class && params->leaf_algo != CATEGORICAL_LEAF &&
params->num_classes <= 2) {
params->output = output_t(params->output | output_t::CLASS);
}
// "random forest" in treelite means tree output averaging
Expand Down
Loading

0 comments on commit d7e1c46

Please sign in to comment.