diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..f857814 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "files.associations": { + "iosfwd": "cpp", + "vector": "cpp" + } +} \ No newline at end of file diff --git a/include/activation/activation_functions.h b/include/activation/activation_functions.h new file mode 100644 index 0000000..6f306ea --- /dev/null +++ b/include/activation/activation_functions.h @@ -0,0 +1,33 @@ +#ifndef ACTIVATION_FUNCTIONS_H +#define ACTIVATION_FUNCTIONS_H + +#include <vector> +#include <cmath> + +namespace activation { + + // Sigmoid activation function + inline double sigmoid(double x); + + // Derivative of sigmoid function + inline double sigmoid_derivative(double x); + + // Tanh activation function + inline double tanh(double x); + + // Derivative of tanh function + inline double tanh_derivative(double x); + + // ReLU activation function + inline double relu(double x); + + // Derivative of ReLU function + inline double relu_derivative(double x); + + // Apply an activation function to a vector + template <typename Func> + std::vector<double> apply(const std::vector<double>& inputs, Func func); + +} // namespace activation + +#endif // ACTIVATION_FUNCTIONS_H diff --git a/include/algorithms/linear_regression.h b/include/algorithms/linear_regression.h index 5caebf7..8548ee3 100644 --- a/include/algorithms/linear_regression.h +++ b/include/algorithms/linear_regression.h @@ -9,9 +9,9 @@ namespace algorithms { class LinearRegression { public: // Constructor - LinearRegression() : m_slope(0.0), m_intercept(0.0) {} + LinearRegression() : m_slope(0.0), m_intercept(0.0), m_learning_rate(0.01), m_iterations(1000) {} - // Fit the model to the training data + // Fit the model to the training data using gradient descent void fit(const std::vector<double>& x, const std::vector<double>& y); // Predict the output for a given input @@ -21,12 +21,22 @@ class LinearRegression { double getSlope() const { return m_slope; } double getIntercept() const { return m_intercept; } + // Set learning rate and number of iterations + void setLearningRate(double lr) { m_learning_rate = lr; } + void setIterations(int it) { m_iterations = it; } + private: double m_slope; double m_intercept; + double m_learning_rate; + int m_iterations; // Helper function to compute the mean of a vector double mean(const std::vector<double>& v) const; + + // Helper functions for gradient descent + double computeCost(const std::vector<double>& x, const std::vector<double>& y) const; + void gradientDescent(const std::vector<double>& x, const std::vector<double>& y); }; } // namespace algorithms diff --git a/src/activation/activation_functions.cpp b/src/activation/activation_functions.cpp new file mode 100644 index 0000000..319c4aa --- /dev/null +++ b/src/activation/activation_functions.cpp @@ -0,0 +1,51 @@ +#include "activation/activation_functions.h" + +namespace activation { + +// Sigmoid activation function +inline double sigmoid(double x) { + return 1.0 / (1.0 + std::exp(-x)); +} + +// Derivative of sigmoid function +inline double sigmoid_derivative(double x) { + double sig = sigmoid(x); + return sig * (1.0 - sig); +} + +// Tanh activation function +inline double tanh(double x) { + return std::tanh(x); +} + +// Derivative of tanh function +inline double tanh_derivative(double x) { + double tanh_x = tanh(x); + return 1.0 - tanh_x * tanh_x; +} + +// ReLU activation function +inline double relu(double x) { + return std::max(0.0, x); +} + +// Derivative of ReLU function +inline double relu_derivative(double x) { + return (x > 0) ? 1.0 : 0.0; +} + +// Apply an activation function to a vector +template <typename Func> +std::vector<double> apply(const std::vector<double>& inputs, Func func) { + std::vector<double> result; + result.reserve(inputs.size()); + for (double input : inputs) { + result.push_back(func(input)); + } + return result; +} + +// Explicit template instantiations +template std::vector<double> apply(const std::vector<double>& inputs, double (*func)(double)); + +} // namespace activation diff --git a/src/algorithms/linear_regression.cpp b/src/algorithms/linear_regression.cpp index 017d359..3119dc0 100644 --- a/src/algorithms/linear_regression.cpp +++ b/src/algorithms/linear_regression.cpp @@ -1,38 +1,64 @@ #include "algorithms/linear_regression.h" -#include <numeric> // for std::accumulate +#include <numeric> +#include <cmath> namespace algorithms { -void LinearRegression::fit(const std::vector<double>& x, const std::vector<double>& y) { - if (x.size() != y.size() || x.empty()) { - throw std::invalid_argument("Input vectors must be of the same size and non-empty."); +// Compute the mean of a vector +double LinearRegression::mean(const std::vector<double>& v) const { + return std::accumulate(v.begin(), v.end(), 0.0) / v.size(); +} + +// Compute the cost (Mean Squared Error) +double LinearRegression::computeCost(const std::vector<double>& x, const std::vector<double>& y) const { + double total_error = 0.0; + size_t n = x.size(); + for (size_t i = 0; i < n; ++i) { + double prediction = m_slope * x[i] + m_intercept; + double error = prediction - y[i]; + total_error += error * error; } + return total_error / (2 * n); // Mean Squared Error +} - double x_mean = mean(x); - double y_mean = mean(y); +// Perform gradient descent to optimize slope and intercept +void LinearRegression::gradientDescent(const std::vector<double>& x, const std::vector<double>& y) { + size_t n = x.size(); + for (int i = 0; i < m_iterations; ++i) { + double slope_gradient = 0.0; + double intercept_gradient = 0.0; + for (size_t j = 0; j < n; ++j) { + double prediction = m_slope * x[j] + m_intercept; + double error = prediction - y[j]; + slope_gradient += error * x[j]; + intercept_gradient += error; + } + slope_gradient /= n; + intercept_gradient /= n; - double numerator = 0.0; - double denominator = 0.0; + m_slope -= m_learning_rate * slope_gradient; + m_intercept -= m_learning_rate * intercept_gradient; - for (size_t i = 0; i < x.size(); ++i) { - numerator += (x[i] - x_mean) * (y[i] - y_mean); - denominator += (x[i] - x_mean) * (x[i] - x_mean); + // Optional: Print cost every 100 iterations + if (i % 100 == 0) { + double cost = computeCost(x, y); + //uncomment to see cost progress + // std::cout << "Iteration " << i << ": Cost " << cost << std::endl; + } } +} - if (denominator == 0.0) { - throw std::runtime_error("Denominator in slope calculation is zero."); +// Fit the model using gradient descent +void LinearRegression::fit(const std::vector<double>& x, const std::vector<double>& y) { + if (x.size() != y.size()) { + throw std::invalid_argument("Input vectors must have the same size."); } - - m_slope = numerator / denominator; - m_intercept = y_mean - m_slope * x_mean; + gradientDescent(x, y); } +// Predict the output for a given input double LinearRegression::predict(double x) const { return m_slope * x + m_intercept; } -double LinearRegression::mean(const std::vector<double>& v) const { - return std::accumulate(v.begin(), v.end(), 0.0) / v.size(); -} - } // namespace algorithms