Cpp ML Library  1.0.0
A library of Machine Learning Algorithmns seen from the Udemy course Machine Learning A to Z.
NeuralNetwork.hpp
Go to the documentation of this file.
1 #ifndef NEURAL_NETWORK_HPP
2 #define NEURAL_NETWORK_HPP
3 
4 #include <vector>
5 #include <cmath>
6 #include <cstdlib>
7 #include <cassert>
8 #include <iostream>
9 
19 struct Connection {
20  double weight;
21  double deltaWeight;
22 };
23 
28 class Neuron {
29 public:
35  Neuron(unsigned numOutputs, unsigned index);
36 
41  void setOutputVal(double val);
42 
47  double getOutputVal() const;
48 
53  void feedForward(const std::vector<Neuron>& prevLayer);
54 
59  void calcOutputGradients(double targetVal);
60 
65  void calcHiddenGradients(const std::vector<Neuron>& nextLayer);
66 
71  void updateInputWeights(std::vector<Neuron>& prevLayer);
72 
73 private:
78  static double randomWeight();
79 
85  static double activationFunction(double x);
86 
92  static double activationFunctionDerivative(double x);
93 
99  double sumDOW(const std::vector<Neuron>& nextLayer) const;
100 
101  double m_outputVal;
102  std::vector<Connection> m_outputWeights;
103  unsigned m_myIndex;
104  double m_gradient;
105 
106  // Hyperparameters
107  static double eta;
108  static double alpha;
109 };
110 
111 // Initialize static members
112 double Neuron::eta = 0.15; // Learning rate
113 double Neuron::alpha = 0.5; // Momentum
114 
115 Neuron::Neuron(unsigned numOutputs, unsigned index)
116  : m_myIndex(index)
117 {
118  for (unsigned c = 0; c < numOutputs; ++c) {
119  Connection conn;
120  conn.weight = randomWeight();
121  conn.deltaWeight = 0.0;
122  m_outputWeights.push_back(conn);
123  }
124 }
125 
126 void Neuron::setOutputVal(double val) {
127  m_outputVal = val;
128 }
129 
130 double Neuron::getOutputVal() const {
131  return m_outputVal;
132 }
133 
134 void Neuron::feedForward(const std::vector<Neuron>& prevLayer) {
135  double sum = 0.0;
136 
137  // Sum the previous layer's outputs (which are our inputs)
138  // Include the bias node from the previous layer.
139  for (size_t n = 0; n < prevLayer.size(); ++n) {
140  sum += prevLayer[n].getOutputVal() * prevLayer[n].m_outputWeights[m_myIndex].weight;
141  }
142 
143  m_outputVal = Neuron::activationFunction(sum);
144 }
145 
146 void Neuron::calcOutputGradients(double targetVal) {
147  double delta = targetVal - m_outputVal;
148  m_gradient = delta * Neuron::activationFunctionDerivative(m_outputVal);
149 }
150 
151 void Neuron::calcHiddenGradients(const std::vector<Neuron>& nextLayer) {
152  double dow = sumDOW(nextLayer);
153  m_gradient = dow * Neuron::activationFunctionDerivative(m_outputVal);
154 }
155 
156 void Neuron::updateInputWeights(std::vector<Neuron>& prevLayer) {
157  // Update the weights in the previous layer
158  for (size_t n = 0; n < prevLayer.size(); ++n) {
159  Neuron& neuron = prevLayer[n];
160  double oldDeltaWeight = neuron.m_outputWeights[m_myIndex].deltaWeight;
161 
162  double newDeltaWeight =
163  // Individual input, magnified by the gradient and train rate:
164  eta * neuron.getOutputVal() * m_gradient
165  // Also add momentum = a fraction of the previous delta weight
166  + alpha * oldDeltaWeight;
167 
168  neuron.m_outputWeights[m_myIndex].deltaWeight = newDeltaWeight;
169  neuron.m_outputWeights[m_myIndex].weight += newDeltaWeight;
170  }
171 }
172 
173 double Neuron::randomWeight() {
174  return rand() / double(RAND_MAX);
175 }
176 
177 double Neuron::activationFunction(double x) {
178  // Hyperbolic tangent activation function
179  return tanh(x);
180 }
181 
182 double Neuron::activationFunctionDerivative(double x) {
183  // Derivative of tanh activation function
184  return 1.0 - x * x;
185 }
186 
187 double Neuron::sumDOW(const std::vector<Neuron>& nextLayer) const {
188  double sum = 0.0;
189 
190  // Sum our contributions of the errors at the nodes we feed
191  for (size_t n = 0; n < nextLayer.size() - 1; ++n) {
192  sum += m_outputWeights[n].weight * nextLayer[n].m_gradient;
193  }
194 
195  return sum;
196 }
197 
203 public:
208  NeuralNetwork(const std::vector<unsigned>& topology);
209 
214  void feedForward(const std::vector<double>& inputVals);
215 
220  void backProp(const std::vector<double>& targetVals);
221 
226  void getResults(std::vector<double>& resultVals) const;
227 
232  double getRecentAverageError() const;
233 
234 private:
235  std::vector<std::vector<Neuron>> m_layers;
236  double m_error;
237  double m_recentAverageError;
238  static double m_recentAverageSmoothingFactor;
239 };
240 
241 // Initialize static members
242 double NeuralNetwork::m_recentAverageSmoothingFactor = 100.0;
243 
244 NeuralNetwork::NeuralNetwork(const std::vector<unsigned>& topology) {
245  size_t numLayers = topology.size();
246  for (size_t layerNum = 0; layerNum < numLayers; ++layerNum) {
247  m_layers.push_back(std::vector<Neuron>());
248  unsigned numOutputs = (layerNum == topology.size() - 1) ? 0 : topology[layerNum + 1];
249 
250  // Add neurons to the layer, including a bias neuron
251  for (unsigned neuronNum = 0; neuronNum <= topology[layerNum]; ++neuronNum) {
252  m_layers.back().push_back(Neuron(numOutputs, neuronNum));
253  // std::cout << "Created a Neuron!" << std::endl;
254  }
255 
256  // Force the bias node's output value to 1.0
257  m_layers.back().back().setOutputVal(1.0);
258  }
259 }
260 
261 void NeuralNetwork::feedForward(const std::vector<double>& inputVals) {
262  assert(inputVals.size() == m_layers[0].size() - 1);
263 
264  // Assign the input values to the input neurons
265  for (size_t i = 0; i < inputVals.size(); ++i) {
266  m_layers[0][i].setOutputVal(inputVals[i]);
267  }
268 
269  // Forward propagation
270  for (size_t layerNum = 1; layerNum < m_layers.size(); ++layerNum) {
271  std::vector<Neuron>& prevLayer = m_layers[layerNum - 1];
272  for (size_t n = 0; n < m_layers[layerNum].size() - 1; ++n) {
273  m_layers[layerNum][n].feedForward(prevLayer);
274  }
275  }
276 }
277 
278 void NeuralNetwork::backProp(const std::vector<double>& targetVals) {
279  // Calculate overall net error (RMS of output neuron errors)
280  std::vector<Neuron>& outputLayer = m_layers.back();
281  m_error = 0.0;
282 
283  for (size_t n = 0; n < outputLayer.size() - 1; ++n) {
284  double delta = targetVals[n] - outputLayer[n].getOutputVal();
285  m_error += delta * delta;
286  }
287  m_error /= outputLayer.size() - 1; // Get average squared error
288  m_error = sqrt(m_error); // RMS
289 
290  // Implement a recent average measurement
291  m_recentAverageError =
292  (m_recentAverageError * m_recentAverageSmoothingFactor + m_error)
293  / (m_recentAverageSmoothingFactor + 1.0);
294 
295  // Calculate output layer gradients
296  for (size_t n = 0; n < outputLayer.size() - 1; ++n) {
297  outputLayer[n].calcOutputGradients(targetVals[n]);
298  }
299 
300  // Calculate gradients on hidden layers
301  for (size_t layerNum = m_layers.size() - 2; layerNum > 0; --layerNum) {
302  std::vector<Neuron>& hiddenLayer = m_layers[layerNum];
303  std::vector<Neuron>& nextLayer = m_layers[layerNum + 1];
304 
305  for (size_t n = 0; n < hiddenLayer.size(); ++n) {
306  hiddenLayer[n].calcHiddenGradients(nextLayer);
307  }
308  }
309 
310  // Update connection weights for all layers (from output to first hidden layer)
311  for (size_t layerNum = m_layers.size() - 1; layerNum > 0; --layerNum) {
312  std::vector<Neuron>& layer = m_layers[layerNum];
313  std::vector<Neuron>& prevLayer = m_layers[layerNum - 1];
314 
315  for (size_t n = 0; n < layer.size() - 1; ++n) {
316  layer[n].updateInputWeights(prevLayer);
317  }
318  }
319 }
320 
321 void NeuralNetwork::getResults(std::vector<double>& resultVals) const {
322  resultVals.clear();
323  const std::vector<Neuron>& outputLayer = m_layers.back();
324  for (size_t n = 0; n < outputLayer.size() - 1; ++n) {
325  resultVals.push_back(outputLayer[n].getOutputVal());
326  }
327 }
328 
330  return m_recentAverageError;
331 }
332 
333 #endif // NEURAL_NETWORK_HPP
Represents the neural network consisting of layers of neurons.
Definition: NeuralNetwork.hpp:202
void backProp(const std::vector< double > &targetVals)
Performs backpropagation to adjust weights.
Definition: NeuralNetwork.hpp:278
NeuralNetwork(const std::vector< unsigned > &topology)
Constructs a NeuralNetwork with the given topology.
Definition: NeuralNetwork.hpp:244
void feedForward(const std::vector< double > &inputVals)
Feeds the input values forward through the network.
Definition: NeuralNetwork.hpp:261
void getResults(std::vector< double > &resultVals) const
Gets the results from the output layer.
Definition: NeuralNetwork.hpp:321
double getRecentAverageError() const
Gets the recent average error of the network.
Definition: NeuralNetwork.hpp:329
Represents a single neuron in the neural network.
Definition: NeuralNetwork.hpp:28
void setOutputVal(double val)
Sets the output value of the neuron.
Definition: NeuralNetwork.hpp:126
void feedForward(const std::vector< Neuron > &prevLayer)
Feeds forward the input values to the next layer.
Definition: NeuralNetwork.hpp:134
Neuron(unsigned numOutputs, unsigned index)
Constructs a Neuron.
Definition: NeuralNetwork.hpp:115
void updateInputWeights(std::vector< Neuron > &prevLayer)
Updates the input weights for the neuron.
Definition: NeuralNetwork.hpp:156
void calcHiddenGradients(const std::vector< Neuron > &nextLayer)
Calculates the hidden gradients for hidden layers.
Definition: NeuralNetwork.hpp:151
double getOutputVal() const
Gets the output value of the neuron.
Definition: NeuralNetwork.hpp:130
void calcOutputGradients(double targetVal)
Calculates the output gradients for the output layer.
Definition: NeuralNetwork.hpp:146
Represents a connection between neurons with a weight and a change in weight.
Definition: NeuralNetwork.hpp:19
double deltaWeight
The change in weight (for momentum).
Definition: NeuralNetwork.hpp:21
double weight
The weight of the connection.
Definition: NeuralNetwork.hpp:20