Cpp ML Library  1.0.0
A library of Machine Learning Algorithmns seen from the Udemy course Machine Learning A to Z.
KNNClassifier.hpp
Go to the documentation of this file.
1 #ifndef KNN_CLASSIFIER_HPP
2 #define KNN_CLASSIFIER_HPP
3 
4 #include <vector>
5 #include <cmath>
6 #include <algorithm>
7 #include <unordered_map>
8 
19 public:
24  explicit KNNClassifier(int k = 3);
25 
30 
36  void fit(const std::vector<std::vector<double>>& X, const std::vector<int>& y);
37 
43  std::vector<int> predict(const std::vector<std::vector<double>>& X) const;
44 
45 private:
46  int k;
47  std::vector<std::vector<double>> X_train;
48  std::vector<int> y_train;
49 
56  double euclidean_distance(const std::vector<double>& a, const std::vector<double>& b) const;
57 
63  int predict_sample(const std::vector<double>& x) const;
64 };
65 
67 
69 
70 void KNNClassifier::fit(const std::vector<std::vector<double>>& X, const std::vector<int>& y) {
71  X_train = X;
72  y_train = y;
73 }
74 
75 std::vector<int> KNNClassifier::predict(const std::vector<std::vector<double>>& X) const {
76  std::vector<int> predictions;
77  predictions.reserve(X.size());
78  for (const auto& x : X) {
79  predictions.push_back(predict_sample(x));
80  }
81  return predictions;
82 }
83 
84 double KNNClassifier::euclidean_distance(const std::vector<double>& a, const std::vector<double>& b) const {
85  double distance = 0.0;
86  for (size_t i = 0; i < a.size(); ++i) {
87  double diff = a[i] - b[i];
88  distance += diff * diff;
89  }
90  return std::sqrt(distance);
91 }
92 
93 int KNNClassifier::predict_sample(const std::vector<double>& x) const {
94  // Vector to store distances and corresponding labels
95  std::vector<std::pair<double, int>> distances;
96  distances.reserve(X_train.size());
97 
98  // Compute distances to all training samples
99  for (size_t i = 0; i < X_train.size(); ++i) {
100  double dist = euclidean_distance(x, X_train[i]);
101  distances.emplace_back(dist, y_train[i]);
102  }
103 
104  // Sort distances
105  std::nth_element(distances.begin(), distances.begin() + k, distances.end(),
106  [](const std::pair<double, int>& a, const std::pair<double, int>& b) {
107  return a.first < b.first;
108  });
109 
110  // Get the labels of the k nearest neighbors
111  std::unordered_map<int, int> class_counts;
112  for (int i = 0; i < k; ++i) {
113  int label = distances[i].second;
114  class_counts[label]++;
115  }
116 
117  // Determine the majority class
118  int max_count = 0;
119  int majority_class = -1;
120  for (const auto& [label, count] : class_counts) {
121  if (count > max_count) {
122  max_count = count;
123  majority_class = label;
124  }
125  }
126 
127  return majority_class;
128 }
129 
130 #endif // KNN_CLASSIFIER_HPP
K-Nearest Neighbors Classifier for classification tasks.
Definition: KNNClassifier.hpp:18
KNNClassifier(int k=3)
Constructs a KNNClassifier.
Definition: KNNClassifier.hpp:66
void fit(const std::vector< std::vector< double >> &X, const std::vector< int > &y)
Fits the classifier to the training data.
Definition: KNNClassifier.hpp:70
std::vector< int > predict(const std::vector< std::vector< double >> &X) const
Predicts class labels for the given input data.
Definition: KNNClassifier.hpp:75
~KNNClassifier()
Destructor for KNNClassifier.
Definition: KNNClassifier.hpp:68