Cpp ML Library  1.0.0
A library of Machine Learning Algorithmns seen from the Udemy course Machine Learning A to Z.
DecisionTreeRegressor.hpp
Go to the documentation of this file.
1 #ifndef DECISION_TREE_REGRESSOR_HPP
2 #define DECISION_TREE_REGRESSOR_HPP
3 
4 #include <vector>
5 #include <algorithm>
6 #include <numeric>
7 #include <limits>
8 
19 public:
25  DecisionTreeRegressor(int max_depth = 5, int min_samples_split = 2);
26 
31 
37  void fit(const std::vector<std::vector<double>>& X, const std::vector<double>& y);
38 
44  std::vector<double> predict(const std::vector<std::vector<double>>& X) const;
45 
46 private:
47  struct Node {
48  bool is_leaf;
49  double value;
50  int feature_index;
51  double threshold;
52  Node* left;
53  Node* right;
54 
55  Node() : is_leaf(false), value(0.0), feature_index(-1), threshold(0.0), left(nullptr), right(nullptr) {}
56  };
57 
58  Node* root;
59  int max_depth;
60  int min_samples_split;
61 
62  Node* build_tree(const std::vector<std::vector<double>>& X, const std::vector<double>& y, int depth);
63  double calculate_mse(const std::vector<double>& y) const;
64  void split_dataset(const std::vector<std::vector<double>>& X, const std::vector<double>& y, int feature_index, double threshold,
65  std::vector<std::vector<double>>& X_left, std::vector<double>& y_left,
66  std::vector<std::vector<double>>& X_right, std::vector<double>& y_right) const;
67  double predict_sample(const std::vector<double>& x, Node* node) const;
68  void delete_tree(Node* node);
69 };
70 
71 DecisionTreeRegressor::DecisionTreeRegressor(int max_depth, int min_samples_split)
72  : root(nullptr), max_depth(max_depth), min_samples_split(min_samples_split) {}
73 
75  delete_tree(root);
76 }
77 
78 void DecisionTreeRegressor::fit(const std::vector<std::vector<double>>& X, const std::vector<double>& y) {
79  root = build_tree(X, y, 0);
80 }
81 
82 std::vector<double> DecisionTreeRegressor::predict(const std::vector<std::vector<double>>& X) const {
83  std::vector<double> predictions;
84  for (const auto& x : X) {
85  predictions.push_back(predict_sample(x, root));
86  }
87  return predictions;
88 }
89 
90 DecisionTreeRegressor::Node* DecisionTreeRegressor::build_tree(const std::vector<std::vector<double>>& X, const std::vector<double>& y, int depth) {
91  Node* node = new Node();
92 
93  // Check stopping criteria
94  if (depth >= max_depth || y.size() < min_samples_split) {
95  node->is_leaf = true;
96  node->value = std::accumulate(y.begin(), y.end(), 0.0) / y.size();
97  return node;
98  }
99 
100  double best_mse = std::numeric_limits<double>::max();
101  int best_feature_index = -1;
102  double best_threshold = 0.0;
103  std::vector<std::vector<double>> best_X_left, best_X_right;
104  std::vector<double> best_y_left, best_y_right;
105 
106  int num_features = X[0].size();
107  for (int feature_index = 0; feature_index < num_features; ++feature_index) {
108  // Get all possible thresholds
109  std::vector<double> feature_values;
110  for (const auto& x : X) {
111  feature_values.push_back(x[feature_index]);
112  }
113  std::sort(feature_values.begin(), feature_values.end());
114  std::vector<double> thresholds;
115  for (size_t i = 1; i < feature_values.size(); ++i) {
116  thresholds.push_back((feature_values[i - 1] + feature_values[i]) / 2.0);
117  }
118 
119  // Evaluate each threshold
120  for (double threshold : thresholds) {
121  std::vector<std::vector<double>> X_left, X_right;
122  std::vector<double> y_left, y_right;
123  split_dataset(X, y, feature_index, threshold, X_left, y_left, X_right, y_right);
124 
125  if (y_left.empty() || y_right.empty())
126  continue;
127 
128  double mse_left = calculate_mse(y_left);
129  double mse_right = calculate_mse(y_right);
130  double mse = (mse_left * y_left.size() + mse_right * y_right.size()) / y.size();
131 
132  if (mse < best_mse) {
133  best_mse = mse;
134  best_feature_index = feature_index;
135  best_threshold = threshold;
136  best_X_left = X_left;
137  best_X_right = X_right;
138  best_y_left = y_left;
139  best_y_right = y_right;
140  }
141  }
142  }
143 
144  // If no split improves the mse, make this a leaf node
145  if (best_feature_index == -1) {
146  node->is_leaf = true;
147  node->value = std::accumulate(y.begin(), y.end(), 0.0) / y.size();
148  return node;
149  }
150 
151  // Recursively build the left and right subtrees
152  node->feature_index = best_feature_index;
153  node->threshold = best_threshold;
154  node->left = build_tree(best_X_left, best_y_left, depth + 1);
155  node->right = build_tree(best_X_right, best_y_right, depth + 1);
156  return node;
157 }
158 
159 double DecisionTreeRegressor::calculate_mse(const std::vector<double>& y) const {
160  double mean = std::accumulate(y.begin(), y.end(), 0.0) / y.size();
161  double mse = 0.0;
162  for (double val : y) {
163  mse += (val - mean) * (val - mean);
164  }
165  return mse / y.size();
166 }
167 
168 void DecisionTreeRegressor::split_dataset(const std::vector<std::vector<double>>& X, const std::vector<double>& y,
169  int feature_index, double threshold,
170  std::vector<std::vector<double>>& X_left, std::vector<double>& y_left,
171  std::vector<std::vector<double>>& X_right, std::vector<double>& y_right) const {
172  for (size_t i = 0; i < X.size(); ++i) {
173  if (X[i][feature_index] <= threshold) {
174  X_left.push_back(X[i]);
175  y_left.push_back(y[i]);
176  } else {
177  X_right.push_back(X[i]);
178  y_right.push_back(y[i]);
179  }
180  }
181 }
182 
183 double DecisionTreeRegressor::predict_sample(const std::vector<double>& x, Node* node) const {
184  if (node->is_leaf) {
185  return node->value;
186  }
187  if (x[node->feature_index] <= node->threshold) {
188  return predict_sample(x, node->left);
189  } else {
190  return predict_sample(x, node->right);
191  }
192 }
193 
194 void DecisionTreeRegressor::delete_tree(Node* node) {
195  if (node != nullptr) {
196  delete_tree(node->left);
197  delete_tree(node->right);
198  delete node;
199  }
200 }
201 
202 #endif // DECISION_TREE_REGRESSOR_HPP
Implements a Decision Tree Regressor.
Definition: DecisionTreeRegressor.hpp:18
void fit(const std::vector< std::vector< double >> &X, const std::vector< double > &y)
Fits the model to the training data.
Definition: DecisionTreeRegressor.hpp:78
std::vector< double > predict(const std::vector< std::vector< double >> &X) const
Predicts target values for given input data.
Definition: DecisionTreeRegressor.hpp:82
DecisionTreeRegressor(int max_depth=5, int min_samples_split=2)
Constructs a DecisionTreeRegressor.
Definition: DecisionTreeRegressor.hpp:71
~DecisionTreeRegressor()
Destructor for DecisionTreeRegressor.
Definition: DecisionTreeRegressor.hpp:74