Loading [MathJax]/extensions/tex2jax.js
cuML C++ API  23.12
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
randomforest.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cuml/common/logger.hpp>
22 
23 #include <map>
24 #include <memory>
25 
26 namespace raft {
27 class handle_t; // forward decl
28 }
29 
30 namespace ML {
31 
32 enum RF_type {
35 };
36 
38 
39 struct RF_metrics {
41 
42  // Classification metrics
43  float accuracy;
44 
45  // Regression metrics
49 };
50 
52  float accuracy,
53  double mean_abs_error,
54  double mean_squared_error,
55  double median_abs_error);
57 RF_metrics set_rf_metrics_regression(double mean_abs_error,
58  double mean_squared_error,
59  double median_abs_error);
60 void print(const RF_metrics rf_metrics);
61 
62 struct RF_params {
66  int n_trees;
77  bool bootstrap;
81  float max_samples;
88  uint64_t seed;
94  int n_streams;
96 };
97 
98 /* Update labels so they are unique from 0 to n_unique_vals.
99  Create an old_label to new_label map per random forest.
100 */
101 void preprocess_labels(int n_rows,
102  std::vector<int>& labels,
103  std::map<int, int>& labels_map,
104  int verbosity = CUML_LEVEL_INFO);
105 
106 /* Revert preprocessing effect, if needed. */
107 void postprocess_labels(int n_rows,
108  std::vector<int>& labels,
109  std::map<int, int>& labels_map,
110  int verbosity = CUML_LEVEL_INFO);
111 
112 template <class T, class L>
114  std::vector<std::shared_ptr<DT::TreeMetaDataNode<T, L>>> trees;
116 };
117 
118 template <class T, class L>
120 
121 template <class T, class L>
123 
124 template <class T, class L>
126 
127 template <class T, class L>
128 std::string get_rf_json(const RandomForestMetaData<T, L>* forest);
129 
130 template <class T, class L>
132  const RandomForestMetaData<T, L>* forest,
133  int num_features);
134 
135 ModelHandle concatenate_trees(std::vector<ModelHandle> treelite_handles);
136 
138  std::vector<ModelHandle> treelite_handles);
139 // ----------------------------- Classification ----------------------------------- //
140 
143 
144 void fit(const raft::handle_t& user_handle,
145  RandomForestClassifierF*& forest,
146  float* input,
147  int n_rows,
148  int n_cols,
149  int* labels,
150  int n_unique_labels,
151  RF_params rf_params,
152  int verbosity = CUML_LEVEL_INFO);
153 void fit(const raft::handle_t& user_handle,
154  RandomForestClassifierD*& forest,
155  double* input,
156  int n_rows,
157  int n_cols,
158  int* labels,
159  int n_unique_labels,
160  RF_params rf_params,
161  int verbosity = CUML_LEVEL_INFO);
162 
163 void predict(const raft::handle_t& user_handle,
164  const RandomForestClassifierF* forest,
165  const float* input,
166  int n_rows,
167  int n_cols,
168  int* predictions,
169  int verbosity = CUML_LEVEL_INFO);
170 void predict(const raft::handle_t& user_handle,
171  const RandomForestClassifierD* forest,
172  const double* input,
173  int n_rows,
174  int n_cols,
175  int* predictions,
176  int verbosity = CUML_LEVEL_INFO);
177 
178 RF_metrics score(const raft::handle_t& user_handle,
179  const RandomForestClassifierF* forest,
180  const int* ref_labels,
181  int n_rows,
182  const int* predictions,
183  int verbosity = CUML_LEVEL_INFO);
184 RF_metrics score(const raft::handle_t& user_handle,
185  const RandomForestClassifierD* forest,
186  const int* ref_labels,
187  int n_rows,
188  const int* predictions,
189  int verbosity = CUML_LEVEL_INFO);
190 
191 RF_params set_rf_params(int max_depth,
192  int max_leaves,
193  float max_features,
194  int max_n_bins,
195  int min_samples_leaf,
196  int min_samples_split,
197  float min_impurity_decrease,
198  bool bootstrap,
199  int n_trees,
200  float max_samples,
201  uint64_t seed,
202  CRITERION split_criterion,
203  int cfg_n_streams,
204  int max_batch_size);
205 
206 // ----------------------------- Regression ----------------------------------- //
207 
210 
211 void fit(const raft::handle_t& user_handle,
212  RandomForestRegressorF*& forest,
213  float* input,
214  int n_rows,
215  int n_cols,
216  float* labels,
217  RF_params rf_params,
218  int verbosity = CUML_LEVEL_INFO);
219 void fit(const raft::handle_t& user_handle,
220  RandomForestRegressorD*& forest,
221  double* input,
222  int n_rows,
223  int n_cols,
224  double* labels,
225  RF_params rf_params,
226  int verbosity = CUML_LEVEL_INFO);
227 
228 void predict(const raft::handle_t& user_handle,
229  const RandomForestRegressorF* forest,
230  const float* input,
231  int n_rows,
232  int n_cols,
233  float* predictions,
234  int verbosity = CUML_LEVEL_INFO);
235 void predict(const raft::handle_t& user_handle,
236  const RandomForestRegressorD* forest,
237  const double* input,
238  int n_rows,
239  int n_cols,
240  double* predictions,
241  int verbosity = CUML_LEVEL_INFO);
242 
243 RF_metrics score(const raft::handle_t& user_handle,
244  const RandomForestRegressorF* forest,
245  const float* ref_labels,
246  int n_rows,
247  const float* predictions,
248  int verbosity = CUML_LEVEL_INFO);
249 RF_metrics score(const raft::handle_t& user_handle,
250  const RandomForestRegressorD* forest,
251  const double* ref_labels,
252  int n_rows,
253  const double* predictions,
254  int verbosity = CUML_LEVEL_INFO);
255 }; // namespace ML
#define CUML_LEVEL_INFO
Definition: log_levels.hpp:28
Definition: dbscan.hpp:27
void postprocess_labels(int n_rows, std::vector< int > &labels, std::map< int, int > &labels_map, int verbosity=CUML_LEVEL_INFO)
void predict(const raft::handle_t &user_handle, const RandomForestClassifierF *forest, const float *input, int n_rows, int n_cols, int *predictions, int verbosity=CUML_LEVEL_INFO)
std::string get_rf_json(const RandomForestMetaData< T, L > *forest)
void fit(const raft::handle_t &user_handle, RandomForestClassifierF *&forest, float *input, int n_rows, int n_cols, int *labels, int n_unique_labels, RF_params rf_params, int verbosity=CUML_LEVEL_INFO)
std::string get_rf_summary_text(const RandomForestMetaData< T, L > *forest)
RandomForestMetaData< float, int > RandomForestClassifierF
Definition: randomforest.hpp:141
void print(const RF_metrics rf_metrics)
void delete_rf_metadata(RandomForestMetaData< T, L > *forest)
RF_type
Definition: randomforest.hpp:32
@ REGRESSION
Definition: randomforest.hpp:34
@ CLASSIFICATION
Definition: randomforest.hpp:33
RF_metrics set_all_rf_metrics(RF_type rf_type, float accuracy, double mean_abs_error, double mean_squared_error, double median_abs_error)
RandomForestMetaData< double, double > RandomForestRegressorD
Definition: randomforest.hpp:209
RandomForestMetaData< float, float > RandomForestRegressorF
Definition: randomforest.hpp:208
CRITERION
Definition: algo_helper.h:20
RF_metrics set_rf_metrics_classification(float accuracy)
std::string get_rf_detailed_text(const RandomForestMetaData< T, L > *forest)
void preprocess_labels(int n_rows, std::vector< int > &labels, std::map< int, int > &labels_map, int verbosity=CUML_LEVEL_INFO)
RF_params set_rf_params(int max_depth, int max_leaves, float max_features, int max_n_bins, int min_samples_leaf, int min_samples_split, float min_impurity_decrease, bool bootstrap, int n_trees, float max_samples, uint64_t seed, CRITERION split_criterion, int cfg_n_streams, int max_batch_size)
RF_metrics set_rf_metrics_regression(double mean_abs_error, double mean_squared_error, double median_abs_error)
ModelHandle concatenate_trees(std::vector< ModelHandle > treelite_handles)
RandomForestMetaData< double, int > RandomForestClassifierD
Definition: randomforest.hpp:142
RF_metrics score(const raft::handle_t &user_handle, const RandomForestClassifierF *forest, const int *ref_labels, int n_rows, const int *predictions, int verbosity=CUML_LEVEL_INFO)
void compare_concat_forest_to_subforests(ModelHandle concat_tree_handle, std::vector< ModelHandle > treelite_handles)
task_category
Definition: randomforest.hpp:37
@ REGRESSION_MODEL
Definition: randomforest.hpp:37
@ CLASSIFICATION_MODEL
Definition: randomforest.hpp:37
void build_treelite_forest(ModelHandle *model, const RandomForestMetaData< T, L > *forest, int num_features)
Definition: dbscan.hpp:23
Definition: decisiontree.hpp:29
Definition: randomforest.hpp:39
RF_type rf_type
Definition: randomforest.hpp:40
double mean_squared_error
Definition: randomforest.hpp:47
double median_abs_error
Definition: randomforest.hpp:48
float accuracy
Definition: randomforest.hpp:43
double mean_abs_error
Definition: randomforest.hpp:46
Definition: randomforest.hpp:62
uint64_t seed
Definition: randomforest.hpp:88
int n_streams
Definition: randomforest.hpp:94
DT::DecisionTreeParams tree_params
Definition: randomforest.hpp:95
bool bootstrap
Definition: randomforest.hpp:77
int n_trees
Definition: randomforest.hpp:66
float max_samples
Definition: randomforest.hpp:81
Definition: randomforest.hpp:113
RF_params rf_params
Definition: randomforest.hpp:115
std::vector< std::shared_ptr< DT::TreeMetaDataNode< T, L > > > trees
Definition: randomforest.hpp:114
void * ModelHandle
Definition: treelite_defs.hpp:23