Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-23.06' into branch-23.08
Browse files Browse the repository at this point in the history
  • Loading branch information
ajschmidt8 committed Jun 2, 2023
2 parents d929631 + 20fcb7e commit c2d139a
Show file tree
Hide file tree
Showing 62 changed files with 3,977 additions and 968 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,5 +105,5 @@ jobs:
# On arm also need to install CMake because treelite needs to be compiled (no wheels available for arm).
test-before-arm64: "pip install cmake && pip install git+https://github.com/dask/[email protected] git+https://github.com/dask/[email protected] git+https://github.com/rapidsai/[email protected]"
# parallelization is based on current test memory usage
test-unittest: "pytest ./python/cuml/tests -k 'not test_sparse_pca_inputs' -n 8 --ignore=python/cuml/tests/dask && pytest ./python/cuml/tests -k 'test_sparse_pca_inputs' && pytest ./python/cuml/tests/dask"
test-unittest: "python -m pytest ./python/cuml/tests -k 'not test_sparse_pca_inputs' -n 8 --ignore=python/cuml/tests/dask && python -m pytest ./python/cuml/tests -k 'test_sparse_pca_inputs' && python -m pytest ./python/cuml/tests/dask"
test-smoketest: "python ci/wheel_smoke_test_cuml.py"
1 change: 0 additions & 1 deletion ci/release/apply_wheel_modifications.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@ sed -i "s/rmm/rmm${CUDA_SUFFIX}/g" python/pyproject.toml
if [[ $CUDA_SUFFIX == "-cu12" ]]; then
sed -i "s/cuda-python[<=>\.,0-9]*/cuda-python>=12.0,<13.0/g" python/pyproject.toml
sed -i "s/cupy-cuda11x/cupy-cuda12x/g" python/pyproject.toml
sed -i "s/numba[<=>\.,0-9]*/numba>=0.57/g" python/pyproject.toml
fi
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ dependencies:
- nbsphinx
- ninja
- nltk
- numba>=0.56.4,<0.57
- numba>=0.57
- numpydoc
- pip
- pydata-sphinx-theme
Expand Down
1 change: 0 additions & 1 deletion conda/recipes/cuml/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ requirements:
- python x.x
- raft-dask ={{ minor_version }}
- treelite {{ treelite_version }}
- seaborn

tests:
requirements:
Expand Down
2 changes: 1 addition & 1 deletion cpp/bench/sg/svc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ std::vector<SvcParams<D>> getInputs()

// SvmParameter{C, cache_size, max_iter, nochange_steps, tol, verbosity})
p.svm_param = ML::SVM::SvmParameter{1, 200, 100, 100, 1e-3, CUML_LEVEL_INFO, 0, ML::SVM::C_SVC};
p.model = ML::SVM::SvmModel<D>{0, 0, 0, nullptr, nullptr, nullptr, 0, nullptr};
p.model = ML::SVM::SvmModel<D>{0, 0, 0, nullptr, {}, nullptr, 0, nullptr};

std::vector<Triplets> rowcols = {{50000, 2, 2}, {2048, 100000, 2}, {50000, 1000, 2}};

Expand Down
12 changes: 12 additions & 0 deletions cpp/include/cuml/experimental/fil/decision_forest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ struct decision_forest {
decision_forest()
: nodes_{},
root_node_indexes_{},
node_id_mapping_{},
vector_output_{},
categorical_storage_{},
num_features_{},
Expand All @@ -125,6 +126,8 @@ struct decision_forest {
* @param nodes A buffer containing all nodes within the forest
* @param root_node_indexes A buffer containing the index of the root node
* of every tree in the forest
* @param node_id_mapping Mapping to use to convert FIL's internal node ID into Treelite's node
* ID. Only relevant when predict_type == infer_kind::leaf_id
* @param num_features The number of features per input sample for this model
* @param num_outputs The number of outputs per row from this model
* @param has_categorical_nodes Whether this forest contains any
Expand Down Expand Up @@ -155,6 +158,7 @@ struct decision_forest {
*/
decision_forest(raft_proto::buffer<node_type>&& nodes,
raft_proto::buffer<index_type>&& root_node_indexes,
raft_proto::buffer<index_type>&& node_id_mapping,
index_type num_features,
index_type num_outputs = index_type{2},
bool has_categorical_nodes = false,
Expand All @@ -169,6 +173,7 @@ struct decision_forest {
io_type postproc_constant = io_type{1})
: nodes_{nodes},
root_node_indexes_{root_node_indexes},
node_id_mapping_{node_id_mapping},
vector_output_{vector_output},
categorical_storage_{categorical_storage},
num_features_{num_features},
Expand Down Expand Up @@ -207,6 +212,8 @@ struct decision_forest {
if (inference_kind == infer_kind::per_tree) {
result = num_trees();
if (has_vector_leaves()) { result *= num_outputs_; }
} else if (inference_kind == infer_kind::leaf_id) {
result = num_trees();
}
return result;
}
Expand All @@ -233,6 +240,8 @@ struct decision_forest {
* @param[in] predict_type Type of inference to perform. Defaults to summing
* the outputs of all trees and produce an output per row. If set to
* "per_tree", we will instead output all outputs of individual trees.
* If set to "leaf_id", we will output the integer ID of the leaf node
* for each tree.
* @param[in] specified_rows_per_block_iter If non-nullopt, this value is
* used to determine how many rows are evaluated for each inference
* iteration within a CUDA block. Runtime performance is quite sensitive
Expand Down Expand Up @@ -301,6 +310,8 @@ struct decision_forest {
raft_proto::buffer<node_type> nodes_;
/** The index of the root node for each tree in the forest */
raft_proto::buffer<index_type> root_node_indexes_;
/** Mapping to apply to node IDs. Only relevant when predict_type == infer_kind::leaf_id */
raft_proto::buffer<index_type> node_id_mapping_;
/** Buffer of outputs for all leaves in vector-leaf models */
std::optional<raft_proto::buffer<io_type>> vector_output_;
/** Buffer of elements used as backing data for bitsets which specify
Expand All @@ -323,6 +334,7 @@ struct decision_forest {
{
return forest_type{nodes_.data(),
root_node_indexes_.data(),
node_id_mapping_.data(),
static_cast<index_type>(root_node_indexes_.size()),
num_outputs_};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ struct decision_forest_builder {
if (cur_tree_size_ % alignment_ != index_type{}) {
auto padding = (alignment_ - cur_tree_size_ % alignment_);
for (auto i = index_type{}; i < padding; ++i) {
add_node(typename node_type::threshold_type{});
add_node(typename node_type::threshold_type{}, std::nullopt);
}
}
}
Expand All @@ -81,6 +81,7 @@ struct decision_forest_builder {
void add_categorical_node(
iter_t vec_begin,
iter_t vec_end,
std::optional<int> tl_node_id = std::nullopt,
bool default_to_distant_child = false,
typename node_type::metadata_storage_type feature = typename node_type::metadata_storage_type{},
typename node_type::offset_type offset = typename node_type::offset_type{})
Expand All @@ -100,12 +101,14 @@ struct decision_forest_builder {
auto set = bitset{set_storage, max_node_categories};
std::for_each(vec_begin, vec_end, [&set](auto&& cat_index) { set.set(cat_index); });

add_node(node_value, false, default_to_distant_child, true, feature, offset, false);
add_node(node_value, tl_node_id, false, default_to_distant_child, true, feature, offset, false);
}

/* Add a leaf node with vector output */
template <typename iter_t>
void add_leaf_vector_node(iter_t vec_begin, iter_t vec_end)
void add_leaf_vector_node(iter_t vec_begin,
iter_t vec_end,
std::optional<int> tl_node_id = std::nullopt)
{
auto leaf_index = typename node_type::index_type(vector_output_.size() / output_size_);
std::copy(vec_begin, vec_end, std::back_inserter(vector_output_));
Expand All @@ -115,13 +118,16 @@ struct decision_forest_builder {
false,
typename node_type::metadata_storage_type{},
typename node_type::offset_type{});
// 0 indicates the lack of ID mapping for a particular node
node_id_mapping_.push_back(static_cast<index_type>(tl_node_id.value_or(0)));
++cur_tree_size_;
}

/* Add a node to the model */
template <typename value_t>
void add_node(
value_t val,
std::optional<int> tl_node_id = std::nullopt,
bool is_leaf_node = true,
bool default_to_distant_child = false,
bool is_categorical_node = false,
Expand All @@ -132,6 +138,8 @@ struct decision_forest_builder {
if (is_inclusive) { val = std::nextafter(val, std::numeric_limits<value_t>::infinity()); }
nodes_.emplace_back(
val, is_leaf_node, default_to_distant_child, is_categorical_node, feature, offset);
// 0 indicates the lack of ID mapping for a particular node
node_id_mapping_.push_back(static_cast<index_type>(tl_node_id.value_or(0)));
++cur_tree_size_;
}

Expand Down Expand Up @@ -192,6 +200,10 @@ struct decision_forest_builder {
mem_type,
device,
stream},
raft_proto::buffer{raft_proto::buffer{node_id_mapping_.data(), node_id_mapping_.size()},
mem_type,
device,
stream},
num_feature,
num_class,
max_num_categories_ != 0,
Expand Down Expand Up @@ -234,6 +246,7 @@ struct decision_forest_builder {
std::vector<index_type> root_node_indexes_;
std::vector<typename node_type::threshold_type> vector_output_;
std::vector<typename node_type::index_type> categorical_storage_;
std::vector<index_type> node_id_mapping_;
};

} // namespace detail
Expand Down
117 changes: 106 additions & 11 deletions cpp/include/cuml/experimental/fil/detail/evaluate_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
#pragma once
#include <stdint.h>
#include <type_traits>
#ifndef __CUDACC__
#include <math.h>
#endif
Expand All @@ -26,19 +27,33 @@ namespace fil {
namespace detail {

/*
* Evaluate a single tree on a single row
* Evaluate a single tree on a single row.
* If node_id_mapping is not-nullptr, this kernel outputs leaf node's ID
* instead of the leaf value.
*
* @tparam has_vector_leaves Whether or not this tree has vector leaves
* @tparam has_categorical nodes Whether or not this tree has any nodes with
* @tparam has_categorical_nodes Whether or not this tree has any nodes with
* categorical splits
* @tparam node_t The type of nodes in this tree
* @tparam io_t The type used for input to and output from this tree (typically
* either floats or doubles)
* @tparam node_id_mapping_t If non-nullptr_t, this indicates the type we expect for
* node_id_mapping.
* @param node Pointer to the root node of this tree
* @param row Pointer to the input data for this row
* @param first_root_node Pointer to the root node of the first tree.
* @param node_id_mapping Array representing the mapping from internal node IDs to
* final leaf ID outputs
*/
template <bool has_vector_leaves, bool has_categorical_nodes, typename node_t, typename io_t>
HOST DEVICE auto evaluate_tree(node_t const* __restrict__ node, io_t const* __restrict__ row)
template <bool has_vector_leaves,
bool has_categorical_nodes,
typename node_t,
typename io_t,
typename node_id_mapping_t = std::nullptr_t>
HOST DEVICE auto evaluate_tree_impl(node_t const* __restrict__ node,
io_t const* __restrict__ row,
node_t const* __restrict__ first_root_node = nullptr,
node_id_mapping_t node_id_mapping = nullptr)
{
using categorical_set_type = bitset<uint32_t, typename node_t::index_type const>;
auto cur_node = *node;
Expand All @@ -60,12 +75,18 @@ HOST DEVICE auto evaluate_tree(node_t const* __restrict__ node, io_t const* __re
node += cur_node.child_offset(condition);
cur_node = *node;
} while (!cur_node.is_leaf());
return cur_node.template output<has_vector_leaves>();
if constexpr (std::is_same_v<node_id_mapping_t, std::nullptr_t>) {
return cur_node.template output<has_vector_leaves>();
} else {
return node_id_mapping[node - first_root_node];
}
}

/*
* Evaluate a single tree which requires external categorical storage on a
* single node
* single node.
* If node_id_mapping is not-nullptr, this kernel outputs leaf node's ID
* instead of the leaf value.
*
* For non-categorical models and models with a relatively small number of
* categories for any feature, all information necessary for model evaluation
Expand All @@ -81,15 +102,23 @@ HOST DEVICE auto evaluate_tree(node_t const* __restrict__ node, io_t const* __re
* either floats or doubles)
* @tparam categorical_storage_t The underlying type used for storing
* categorical data (typically char)
* @tparam node_id_mapping_t If non-nullptr_t, this indicates the type we expect for
* node_id_mapping.
* @param node Pointer to the root node of this tree
* @param row Pointer to the input data for this row
* @param categorical_storage Pointer to where categorical split data is
* stored.
*/
template <bool has_vector_leaves, typename node_t, typename io_t, typename categorical_storage_t>
HOST DEVICE auto evaluate_tree(node_t const* __restrict__ node,
io_t const* __restrict__ row,
categorical_storage_t const* __restrict__ categorical_storage)
template <bool has_vector_leaves,
typename node_t,
typename io_t,
typename categorical_storage_t,
typename node_id_mapping_t = std::nullptr_t>
HOST DEVICE auto evaluate_tree_impl(node_t const* __restrict__ node,
io_t const* __restrict__ row,
categorical_storage_t const* __restrict__ categorical_storage,
node_t const* __restrict__ first_root_node = nullptr,
node_id_mapping_t node_id_mapping = nullptr)
{
using categorical_set_type = bitset<uint32_t, categorical_storage_t const>;
auto cur_node = *node;
Expand All @@ -109,7 +138,73 @@ HOST DEVICE auto evaluate_tree(node_t const* __restrict__ node,
node += cur_node.child_offset(condition);
cur_node = *node;
} while (!cur_node.is_leaf());
return cur_node.template output<has_vector_leaves>();
if constexpr (std::is_same_v<node_id_mapping_t, std::nullptr_t>) {
return cur_node.template output<has_vector_leaves>();
} else {
return node_id_mapping[node - first_root_node];
}
}

/**
* Dispatch to an appropriate version of evaluate_tree kernel.
*
* @tparam has_vector_leaves Whether or not this tree has vector leaves
* @tparam has_categorical_nodes Whether or not this tree has any nodes with
* categorical splits
* @tparam has_nonlocal_categories Whether or not this tree has any nodes that store
* categorical split data externally
* @tparam predict_leaf Whether to predict leaf IDs
* @tparam forest_t The type of forest
* @tparam io_t The type used for input to and output from this tree (typically
* either floats or doubles)
* @tparam categorical_data_t The type for non-local categorical data storage.
* @param forest The forest used to perform inference
* @param tree_index The index of the tree we are evaluating
* @param row The data row we are evaluating
* @param categorical_data The pointer to where non-local data on categorical splits are stored.
*/
template <bool has_vector_leaves,
bool has_categorical_nodes,
bool has_nonlocal_categories,
bool predict_leaf,
typename forest_t,
typename io_t,
typename categorical_data_t>
HOST DEVICE auto evaluate_tree(forest_t const& forest,
index_type tree_index,
io_t const* __restrict__ row,
categorical_data_t categorical_data)
{
using node_t = typename forest_t::node_type;
if constexpr (predict_leaf) {
auto leaf_node_id = index_type{};
if constexpr (has_nonlocal_categories) {
leaf_node_id = evaluate_tree_impl<has_vector_leaves>(forest.get_tree_root(tree_index),
row,
categorical_data,
forest.get_tree_root(0),
forest.get_node_id_mapping());
} else {
leaf_node_id = evaluate_tree_impl<has_vector_leaves, has_categorical_nodes>(
forest.get_tree_root(tree_index),
row,
forest.get_tree_root(0),
forest.get_node_id_mapping());
}
return leaf_node_id;
} else {
auto tree_output = std::conditional_t<has_vector_leaves,
typename node_t::index_type,
typename node_t::threshold_type>{};
if constexpr (has_nonlocal_categories) {
tree_output = evaluate_tree_impl<has_vector_leaves>(
forest.get_tree_root(tree_index), row, categorical_data);
} else {
tree_output = evaluate_tree_impl<has_vector_leaves, has_categorical_nodes>(
forest.get_tree_root(tree_index), row);
}
return tree_output;
}
}

} // namespace detail
Expand Down
7 changes: 7 additions & 0 deletions cpp/include/cuml/experimental/fil/detail/forest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@ struct forest {

HOST DEVICE forest(node_type* forest_nodes,
index_type* forest_root_indexes,
index_type* node_id_mapping,
index_type num_trees,
index_type num_outputs)
: nodes_{forest_nodes},
root_node_indexes_{forest_root_indexes},
node_id_mapping_{node_id_mapping},
num_trees_{num_trees},
num_outputs_{num_outputs}
{
Expand All @@ -56,6 +58,10 @@ struct forest {
return nodes_ + root_node_indexes_[tree_index];
}

/* Return pointer to the mapping from internal node IDs to final node ID outputs.
* Only used when infer_type == infer_kind::leaf_id */
HOST DEVICE const auto* get_node_id_mapping() const { return node_id_mapping_; }

/* Return the number of trees in this forest */
HOST DEVICE auto tree_count() const { return num_trees_; }

Expand All @@ -66,6 +72,7 @@ struct forest {
private:
node_type* nodes_;
index_type* root_node_indexes_;
index_type* node_id_mapping_;
index_type num_trees_;
index_type num_outputs_;
};
Expand Down
3 changes: 2 additions & 1 deletion cpp/include/cuml/experimental/fil/detail/infer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ namespace detail {
* required
* @param infer_type Type of inference to perform. Defaults to summing the outputs of all trees
* and produce an output per row. If set to "per_tree", we will instead output all outputs of
* individual trees.
* individual trees. If set to "leaf_id", we will output the integer ID of the leaf node
* for each tree.
* @param specified_chunk_size If non-nullopt, the size of "mini-batches"
* used for distributing work across threads
* @param device The device on which to execute evaluation
Expand Down
Loading

0 comments on commit c2d139a

Please sign in to comment.