1 #ifndef DECISION_TREE_REGRESSOR_HPP
2 #define DECISION_TREE_REGRESSOR_HPP
37 void fit(
const std::vector<std::vector<double>>& X,
const std::vector<double>& y);
44 std::vector<double>
predict(
const std::vector<std::vector<double>>& X)
const;
55 Node() : is_leaf(
false), value(0.0), feature_index(-1), threshold(0.0), left(
nullptr), right(
nullptr) {}
60 int min_samples_split;
62 Node* build_tree(
const std::vector<std::vector<double>>& X,
const std::vector<double>& y,
int depth);
63 double calculate_mse(
const std::vector<double>& y)
const;
64 void split_dataset(
const std::vector<std::vector<double>>& X,
const std::vector<double>& y,
int feature_index,
double threshold,
65 std::vector<std::vector<double>>& X_left, std::vector<double>& y_left,
66 std::vector<std::vector<double>>& X_right, std::vector<double>& y_right)
const;
67 double predict_sample(
const std::vector<double>& x, Node* node)
const;
68 void delete_tree(Node* node);
72 : root(nullptr), max_depth(max_depth), min_samples_split(min_samples_split) {}
79 root = build_tree(X, y, 0);
83 std::vector<double> predictions;
84 for (
const auto& x : X) {
85 predictions.push_back(predict_sample(x, root));
90 DecisionTreeRegressor::Node* DecisionTreeRegressor::build_tree(
const std::vector<std::vector<double>>& X,
const std::vector<double>& y,
int depth) {
91 Node* node =
new Node();
94 if (depth >= max_depth || y.size() < min_samples_split) {
96 node->value = std::accumulate(y.begin(), y.end(), 0.0) / y.size();
100 double best_mse = std::numeric_limits<double>::max();
101 int best_feature_index = -1;
102 double best_threshold = 0.0;
103 std::vector<std::vector<double>> best_X_left, best_X_right;
104 std::vector<double> best_y_left, best_y_right;
106 int num_features = X[0].size();
107 for (
int feature_index = 0; feature_index < num_features; ++feature_index) {
109 std::vector<double> feature_values;
110 for (
const auto& x : X) {
111 feature_values.push_back(x[feature_index]);
113 std::sort(feature_values.begin(), feature_values.end());
114 std::vector<double> thresholds;
115 for (
size_t i = 1; i < feature_values.size(); ++i) {
116 thresholds.push_back((feature_values[i - 1] + feature_values[i]) / 2.0);
120 for (
double threshold : thresholds) {
121 std::vector<std::vector<double>> X_left, X_right;
122 std::vector<double> y_left, y_right;
123 split_dataset(X, y, feature_index, threshold, X_left, y_left, X_right, y_right);
125 if (y_left.empty() || y_right.empty())
128 double mse_left = calculate_mse(y_left);
129 double mse_right = calculate_mse(y_right);
130 double mse = (mse_left * y_left.size() + mse_right * y_right.size()) / y.size();
132 if (mse < best_mse) {
134 best_feature_index = feature_index;
135 best_threshold = threshold;
136 best_X_left = X_left;
137 best_X_right = X_right;
138 best_y_left = y_left;
139 best_y_right = y_right;
145 if (best_feature_index == -1) {
146 node->is_leaf =
true;
147 node->value = std::accumulate(y.begin(), y.end(), 0.0) / y.size();
152 node->feature_index = best_feature_index;
153 node->threshold = best_threshold;
154 node->left = build_tree(best_X_left, best_y_left, depth + 1);
155 node->right = build_tree(best_X_right, best_y_right, depth + 1);
159 double DecisionTreeRegressor::calculate_mse(
const std::vector<double>& y)
const {
160 double mean = std::accumulate(y.begin(), y.end(), 0.0) / y.size();
162 for (
double val : y) {
163 mse += (val - mean) * (val - mean);
165 return mse / y.size();
168 void DecisionTreeRegressor::split_dataset(
const std::vector<std::vector<double>>& X,
const std::vector<double>& y,
169 int feature_index,
double threshold,
170 std::vector<std::vector<double>>& X_left, std::vector<double>& y_left,
171 std::vector<std::vector<double>>& X_right, std::vector<double>& y_right)
const {
172 for (
size_t i = 0; i < X.size(); ++i) {
173 if (X[i][feature_index] <= threshold) {
174 X_left.push_back(X[i]);
175 y_left.push_back(y[i]);
177 X_right.push_back(X[i]);
178 y_right.push_back(y[i]);
183 double DecisionTreeRegressor::predict_sample(
const std::vector<double>& x, Node* node)
const {
187 if (x[node->feature_index] <= node->threshold) {
188 return predict_sample(x, node->left);
190 return predict_sample(x, node->right);
194 void DecisionTreeRegressor::delete_tree(Node* node) {
195 if (node !=
nullptr) {
196 delete_tree(node->left);
197 delete_tree(node->right);
Implements a Decision Tree Regressor.
Definition: DecisionTreeRegressor.hpp:18
void fit(const std::vector< std::vector< double >> &X, const std::vector< double > &y)
Fits the model to the training data.
Definition: DecisionTreeRegressor.hpp:78
std::vector< double > predict(const std::vector< std::vector< double >> &X) const
Predicts target values for given input data.
Definition: DecisionTreeRegressor.hpp:82
DecisionTreeRegressor(int max_depth=5, int min_samples_split=2)
Constructs a DecisionTreeRegressor.
Definition: DecisionTreeRegressor.hpp:71
~DecisionTreeRegressor()
Destructor for DecisionTreeRegressor.
Definition: DecisionTreeRegressor.hpp:74