Loading [MathJax]/extensions/tex2jax.js
cuML C++ API  23.12
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
decision_forest.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 #include <algorithm>
18 #include <cstddef>
33 #include <limits>
34 #include <optional>
35 #include <stddef.h>
36 #include <stdint.h>
37 #include <variant>
38 
39 namespace ML {
40 namespace experimental {
41 namespace fil {
42 
66 template <tree_layout layout_v,
67  typename threshold_t,
68  typename index_t,
69  typename metadata_storage_t,
70  typename offset_t>
75  auto constexpr static const layout = layout_v;
88  using io_type = typename forest_type::io_type;
92  using threshold_type = threshold_t;
101 
106  : nodes_{},
107  root_node_indexes_{},
108  node_id_mapping_{},
109  vector_output_{},
110  categorical_storage_{},
111  num_features_{},
112  num_outputs_{},
113  leaf_size_{},
114  has_categorical_nodes_{false},
115  row_postproc_{},
116  elem_postproc_{},
117  average_factor_{},
118  bias_{},
119  postproc_constant_{}
120  {
121  }
122 
160  raft_proto::buffer<index_type>&& root_node_indexes,
161  raft_proto::buffer<index_type>&& node_id_mapping,
164  bool has_categorical_nodes = false,
165  std::optional<raft_proto::buffer<io_type>>&& vector_output = std::nullopt,
166  std::optional<raft_proto::buffer<typename node_type::index_type>>&&
167  categorical_storage = std::nullopt,
168  index_type leaf_size = index_type{1},
169  row_op row_postproc = row_op::disable,
170  element_op elem_postproc = element_op::disable,
171  io_type average_factor = io_type{1},
172  io_type bias = io_type{0},
173  io_type postproc_constant = io_type{1})
174  : nodes_{nodes},
175  root_node_indexes_{root_node_indexes},
176  node_id_mapping_{node_id_mapping},
177  vector_output_{vector_output},
178  categorical_storage_{categorical_storage},
179  num_features_{num_features},
180  num_outputs_{num_outputs},
181  leaf_size_{leaf_size},
182  has_categorical_nodes_{has_categorical_nodes},
183  row_postproc_{row_postproc},
184  elem_postproc_{elem_postproc},
185  average_factor_{average_factor},
186  bias_{bias},
187  postproc_constant_{postproc_constant}
188  {
189  if (nodes.memory_type() != root_node_indexes.memory_type()) {
191  "Nodes and indexes of forest must both be stored on either host or device");
192  }
193  if (nodes.device_index() != root_node_indexes.device_index()) {
195  "Nodes and indexes of forest must both be stored on same device");
196  }
197  detail::initialize_device<forest_type>(nodes.device());
198  }
199 
201  auto num_features() const { return num_features_; }
203  auto num_trees() const { return root_node_indexes_.size(); }
205  auto has_vector_leaves() const { return vector_output_.has_value(); }
206 
209  auto num_outputs(infer_kind inference_kind = infer_kind::default_kind) const
210  {
211  auto result = num_outputs_;
212  if (inference_kind == infer_kind::per_tree) {
213  result = num_trees();
214  if (has_vector_leaves()) { result *= num_outputs_; }
215  } else if (inference_kind == infer_kind::leaf_id) {
216  result = num_trees();
217  }
218  return result;
219  }
220 
222  auto row_postprocessing() const { return row_postproc_; }
223  // Setter for row_postprocessing
224  void set_row_postprocessing(row_op val) { row_postproc_ = val; }
227  auto elem_postprocessing() const { return elem_postproc_; }
228 
230  auto memory_type() { return nodes_.memory_type(); }
232  auto device_index() { return nodes_.device_index(); }
233 
259  infer_kind predict_type = infer_kind::default_kind,
260  std::optional<index_type> specified_rows_per_block_iter = std::nullopt)
261  {
262  if (output.memory_type() != memory_type() || input.memory_type() != memory_type()) {
264  "Tried to use host I/O data with model on device or vice versa"};
265  }
266  if (output.device_index() != device_index() || input.device_index() != device_index()) {
267  throw raft_proto::wrong_device{"I/O data on different device than model"};
268  }
269  auto* vector_output_data =
270  (vector_output_.has_value() ? vector_output_->data() : static_cast<io_type*>(nullptr));
271  auto* categorical_storage_data =
272  (categorical_storage_.has_value() ? categorical_storage_->data()
273  : static_cast<categorical_storage_type*>(nullptr));
274  switch (nodes_.device().index()) {
275  case 0:
276  fil::detail::infer(obj(),
277  get_postprocessor(predict_type),
278  output.data(),
279  input.data(),
280  index_type(input.size() / num_features_),
281  num_features_,
282  num_outputs(predict_type),
283  has_categorical_nodes_,
284  vector_output_data,
285  categorical_storage_data,
286  predict_type,
287  specified_rows_per_block_iter,
288  std::get<0>(nodes_.device()),
289  stream);
290  break;
291  case 1:
292  fil::detail::infer(obj(),
293  get_postprocessor(predict_type),
294  output.data(),
295  input.data(),
296  index_type(input.size() / num_features_),
297  num_features_,
298  num_outputs(predict_type),
299  has_categorical_nodes_,
300  vector_output_data,
301  categorical_storage_data,
302  predict_type,
303  specified_rows_per_block_iter,
304  std::get<1>(nodes_.device()),
305  stream);
306  break;
307  }
308  }
309 
310  private:
314  raft_proto::buffer<index_type> root_node_indexes_;
316  raft_proto::buffer<index_type> node_id_mapping_;
318  std::optional<raft_proto::buffer<io_type>> vector_output_;
321  std::optional<raft_proto::buffer<categorical_storage_type>> categorical_storage_;
322 
323  // Metadata
324  index_type num_features_;
325  index_type num_outputs_;
326  index_type leaf_size_;
327  bool has_categorical_nodes_ = false;
328  // Postprocessing constants
329  row_op row_postproc_;
330  element_op elem_postproc_;
331  io_type average_factor_;
332  io_type bias_;
333  io_type postproc_constant_;
334 
335  auto obj() const
336  {
337  return forest_type{nodes_.data(),
338  root_node_indexes_.data(),
339  node_id_mapping_.data(),
340  static_cast<index_type>(root_node_indexes_.size()),
341  num_outputs_};
342  }
343 
344  auto get_postprocessor(infer_kind inference_kind = infer_kind::default_kind) const
345  {
346  auto result = postprocessor_type{};
347  if (inference_kind == infer_kind::default_kind) {
348  result = postprocessor_type{
349  row_postproc_, elem_postproc_, average_factor_, bias_, postproc_constant_};
350  }
351  return result;
352  }
353 
354  auto leaf_size() const { return leaf_size_; }
355 };
356 
357 namespace detail {
371 template <tree_layout layout, bool double_precision, bool large_trees>
373  layout,
378 
379 } // namespace detail
380 
383  std::variant<detail::preset_decision_forest<
384  std::variant_alternative_t<0, detail::specialization_variant>::layout,
385  std::variant_alternative_t<0, detail::specialization_variant>::is_double_precision,
386  std::variant_alternative_t<0, detail::specialization_variant>::has_large_trees>,
388  std::variant_alternative_t<1, detail::specialization_variant>::layout,
389  std::variant_alternative_t<1, detail::specialization_variant>::is_double_precision,
390  std::variant_alternative_t<1, detail::specialization_variant>::has_large_trees>,
392  std::variant_alternative_t<2, detail::specialization_variant>::layout,
393  std::variant_alternative_t<2, detail::specialization_variant>::is_double_precision,
394  std::variant_alternative_t<2, detail::specialization_variant>::has_large_trees>,
396  std::variant_alternative_t<3, detail::specialization_variant>::layout,
397  std::variant_alternative_t<3, detail::specialization_variant>::is_double_precision,
398  std::variant_alternative_t<3, detail::specialization_variant>::has_large_trees>,
400  std::variant_alternative_t<4, detail::specialization_variant>::layout,
401  std::variant_alternative_t<4, detail::specialization_variant>::is_double_precision,
402  std::variant_alternative_t<4, detail::specialization_variant>::has_large_trees>,
404  std::variant_alternative_t<5, detail::specialization_variant>::layout,
405  std::variant_alternative_t<5, detail::specialization_variant>::is_double_precision,
406  std::variant_alternative_t<5, detail::specialization_variant>::has_large_trees>,
408  std::variant_alternative_t<6, detail::specialization_variant>::layout,
409  std::variant_alternative_t<6, detail::specialization_variant>::is_double_precision,
410  std::variant_alternative_t<6, detail::specialization_variant>::has_large_trees>,
412  std::variant_alternative_t<7, detail::specialization_variant>::layout,
413  std::variant_alternative_t<7, detail::specialization_variant>::is_double_precision,
414  std::variant_alternative_t<7, detail::specialization_variant>::has_large_trees>>;
415 
434 inline auto get_forest_variant_index(bool use_double_thresholds,
435  index_type max_node_offset,
436  index_type num_features,
437  index_type num_categorical_nodes = index_type{},
438  index_type max_num_categories = index_type{},
439  index_type num_vector_leaves = index_type{},
440  tree_layout layout = preferred_tree_layout)
441 {
442  using small_index_t =
444  auto max_local_categories = index_type(sizeof(small_index_t) * 8);
445  // If the index required for pointing to categorical storage bins or vector
446  // leaf output exceeds what we can store in a uint32_t, uint64_t will be used
447  //
448  // TODO(wphicks): We are overestimating categorical storage required here
449  auto double_indexes_required =
450  (max_num_categories > max_local_categories &&
451  ((raft_proto::ceildiv(max_num_categories, max_local_categories) + 1 * num_categorical_nodes) >
453  num_vector_leaves > std::numeric_limits<small_index_t>::max();
454 
455  auto double_precision = use_double_thresholds || double_indexes_required;
456 
457  using small_metadata_t =
459  using small_offset_t =
461 
462  auto large_trees =
463  (num_features > (std::numeric_limits<small_metadata_t>::max() >> reserved_node_metadata_bits) ||
464  max_node_offset > std::numeric_limits<small_offset_t>::max());
465 
466  auto layout_value = static_cast<std::underlying_type_t<tree_layout>>(layout);
467 
468  return ((index_type{layout_value} << index_type{2}) +
469  (index_type{double_precision} << index_type{1}) + index_type{large_trees});
470 }
471 } // namespace fil
472 } // namespace experimental
473 } // namespace ML
math_t max(math_t a, math_t b)
Definition: learning_rate.h:26
void infer(forest_t const &forest, postprocessor< typename forest_t::io_type > const &postproc, typename forest_t::io_type *output, typename forest_t::io_type *input, index_type row_count, index_type col_count, index_type output_count, bool has_categorical_nodes, typename forest_t::io_type *vector_output=nullptr, typename forest_t::node_type::index_type *categorical_data=nullptr, infer_kind infer_type=infer_kind::default_kind, std::optional< index_type > specified_chunk_size=std::nullopt, raft_proto::device_id< D > device=raft_proto::device_id< D >{}, raft_proto::cuda_stream stream=raft_proto::cuda_stream{})
Definition: infer.hpp:66
tree_layout
Definition: tree_layout.hpp:20
element_op
Definition: postproc_ops.hpp:29
uint32_t index_type
Definition: index_type.hpp:21
infer_kind
Definition: infer_kind.hpp:20
auto get_forest_variant_index(bool use_double_thresholds, index_type max_node_offset, index_type num_features, index_type num_categorical_nodes=index_type{}, index_type max_num_categories=index_type{}, index_type num_vector_leaves=index_type{}, tree_layout layout=preferred_tree_layout)
Definition: decision_forest.hpp:434
std::variant< detail::preset_decision_forest< std::variant_alternative_t< 0, detail::specialization_variant >::layout, std::variant_alternative_t< 0, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 0, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 1, detail::specialization_variant >::layout, std::variant_alternative_t< 1, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 1, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 2, detail::specialization_variant >::layout, std::variant_alternative_t< 2, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 2, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 3, detail::specialization_variant >::layout, std::variant_alternative_t< 3, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 3, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 4, detail::specialization_variant >::layout, std::variant_alternative_t< 4, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 4, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 5, detail::specialization_variant >::layout, std::variant_alternative_t< 5, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 5, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 6, detail::specialization_variant >::layout, std::variant_alternative_t< 6, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 6, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 7, detail::specialization_variant >::layout, std::variant_alternative_t< 7, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 7, detail::specialization_variant >::has_large_trees > > decision_forest_variant
Definition: decision_forest.hpp:414
row_op
Definition: postproc_ops.hpp:22
Definition: dbscan.hpp:27
HOST DEVICE constexpr auto ceildiv(T dividend, U divisor)
Definition: ceildiv.hpp:21
int cuda_stream
Definition: cuda_stream.hpp:25
Definition: decision_forest.hpp:71
typename forest_type::node_type node_type
Definition: decision_forest.hpp:84
auto row_postprocessing() const
Definition: decision_forest.hpp:222
auto device_index()
Definition: decision_forest.hpp:232
constexpr static auto const layout
Definition: decision_forest.hpp:75
auto has_vector_leaves() const
Definition: decision_forest.hpp:205
auto num_outputs(infer_kind inference_kind=infer_kind::default_kind) const
Definition: decision_forest.hpp:209
auto elem_postprocessing() const
Definition: decision_forest.hpp:227
postprocessor< io_type > postprocessor_type
Definition: decision_forest.hpp:96
forest< layout, threshold_t, index_t, metadata_storage_t, offset_t > forest_type
Definition: decision_forest.hpp:80
void predict(raft_proto::buffer< typename forest_type::io_type > &output, raft_proto::buffer< typename forest_type::io_type > const &input, raft_proto::cuda_stream stream=raft_proto::cuda_stream{}, infer_kind predict_type=infer_kind::default_kind, std::optional< index_type > specified_rows_per_block_iter=std::nullopt)
Definition: decision_forest.hpp:256
auto num_features() const
Definition: decision_forest.hpp:201
decision_forest(raft_proto::buffer< node_type > &&nodes, raft_proto::buffer< index_type > &&root_node_indexes, raft_proto::buffer< index_type > &&node_id_mapping, index_type num_features, index_type num_outputs=index_type{2}, bool has_categorical_nodes=false, std::optional< raft_proto::buffer< io_type >> &&vector_output=std::nullopt, std::optional< raft_proto::buffer< typename node_type::index_type >> &&categorical_storage=std::nullopt, index_type leaf_size=index_type{1}, row_op row_postproc=row_op::disable, element_op elem_postproc=element_op::disable, io_type average_factor=io_type{1}, io_type bias=io_type{0}, io_type postproc_constant=io_type{1})
Definition: decision_forest.hpp:159
typename forest_type::io_type io_type
Definition: decision_forest.hpp:88
void set_row_postprocessing(row_op val)
Definition: decision_forest.hpp:224
decision_forest()
Definition: decision_forest.hpp:105
auto num_trees() const
Definition: decision_forest.hpp:203
threshold_t threshold_type
Definition: decision_forest.hpp:92
typename node_type::index_type categorical_storage_type
Definition: decision_forest.hpp:100
auto memory_type()
Definition: decision_forest.hpp:230
std::conditional_t< large_trees, std::uint32_t, std::uint16_t > metadata_type
Definition: specialization_types.hpp:51
std::conditional_t< double_precision, std::uint64_t, std::uint32_t > index_type
Definition: specialization_types.hpp:49
std::conditional_t< double_precision, double, float > threshold_type
Definition: specialization_types.hpp:45
std::conditional_t< large_trees, std::uint32_t, std::uint16_t > offset_type
Definition: specialization_types.hpp:53
Definition: forest.hpp:34
threshold_t io_type
Definition: forest.hpp:36
node< layout_v, threshold_t, index_t, metadata_storage_t, offset_t > node_type
Definition: forest.hpp:35
Definition: postprocessor.hpp:137
auto size() const noexcept
Definition: buffer.hpp:291
HOST DEVICE auto * data() const noexcept
Definition: buffer.hpp:292
auto memory_type() const noexcept
Definition: buffer.hpp:293
auto device_index() const noexcept
Definition: buffer.hpp:306
auto device() const noexcept
Definition: buffer.hpp:304
Definition: exceptions.hpp:49
Definition: exceptions.hpp:38
Definition: exceptions.hpp:58