-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
66f6a1b
commit 2d82d91
Showing
5 changed files
with
246 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#include <iostream> | ||
#include <vector> | ||
#include "algorithms/kmeans.h" | ||
|
||
int main() { | ||
// Define data points (each point is 1D in this example) | ||
std::vector<std::vector<double>> data = { | ||
{0.5}, {-0.3}, {0.8}, {-1.2}, {0.0}, {2.5}, {3.0}, {2.8}, {-2.0}, {1.5} | ||
}; | ||
|
||
// Instantiate the KMeans class with 2 clusters | ||
algorithms::KMeans kmeans(2); | ||
|
||
// Fit the KMeans algorithm to the data | ||
kmeans.fit(data); | ||
|
||
// Retrieve cluster labels and centroids | ||
std::vector<int> labels = kmeans.getLabels(); | ||
std::vector<std::vector<double>> centroids = kmeans.getCentroids(); | ||
|
||
// Print cluster labels for each data point | ||
std::cout << "KMeans Labels:" << std::endl; | ||
for (size_t i = 0; i < labels.size(); i++) { | ||
std::cout << "Data Point " << i << " is in Cluster " << labels[i] << std::endl; | ||
} | ||
|
||
// Print the centroids of each cluster | ||
std::cout << "Cluster Centroids:" << std::endl; | ||
for (size_t i = 0; i < centroids.size(); i++) { | ||
std::cout << "Centroid " << i << ": " << centroids[i][0] << std::endl; | ||
} | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#ifndef KMEANS_H | ||
#define KMEANS_H | ||
|
||
#include <vector> | ||
|
||
namespace algorithms{ | ||
|
||
class KMeans { | ||
public: | ||
// Constructor | ||
KMeans(int k, int maxIterations = 100); | ||
|
||
// Fit the model to the data | ||
void fit(const std::vector<std::vector<double>>& data); | ||
|
||
// Get cluster labels | ||
const std::vector<int>& getLabels() const; | ||
|
||
// Get cluster centers | ||
const std::vector<std::vector<double>>& getCentroids() const; | ||
|
||
private: | ||
int k; // Number of clusters | ||
int maxIterations; // Maximum number of iterations | ||
std::vector<std::vector<double>> centroids; // Cluster centroids | ||
std::vector<int> labels; // Cluster labels for each point | ||
|
||
// Randomly initialize centroids from the data points | ||
void initializeCentroids(const std::vector<std::vector<double>>& data, int dimensions); | ||
|
||
// Assign each point to the nearest centroid | ||
void assignClusters(const std::vector<std::vector<double>>& data); | ||
|
||
// Update centroids based on the mean of the assigned points | ||
void updateCentroids(const std::vector<std::vector<double>>& data, int dimensions); | ||
|
||
// Calculate Euclidean distance between two points | ||
double euclideanDistance(const std::vector<double>& p1, const std::vector<double>& p2) const; | ||
|
||
// Check if the centroids have converged (optional, currently fixed iterations) | ||
bool converged() const; | ||
}; | ||
|
||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#ifndef LINEAR_REGRESSION_H | ||
#define LINEAR_REGRESSION_H | ||
|
||
#include <vector> | ||
#include <stdexcept> | ||
|
||
namespace algorithms { | ||
|
||
class LinearRegression { | ||
public: | ||
// Constructor | ||
LinearRegression() : m_slope(0.0), m_intercept(0.0), m_learning_rate(0.01), m_iterations(1000) {} | ||
|
||
// Fit the model to the training data using gradient descent | ||
void fit(const std::vector<double>& x, const std::vector<double>& y); | ||
|
||
// Predict the output for a given input | ||
double predict(double x) const; | ||
|
||
// Getters for the parameters | ||
double getSlope() const { return m_slope; } | ||
double getIntercept() const { return m_intercept; } | ||
|
||
// Set learning rate and number of iterations | ||
void setLearningRate(double lr) { m_learning_rate = lr; } | ||
void setIterations(int it) { m_iterations = it; } | ||
|
||
private: | ||
double m_slope; | ||
double m_intercept; | ||
double m_learning_rate; | ||
int m_iterations; | ||
|
||
// Helper function to compute the mean of a vector | ||
double mean(const std::vector<double>& v) const; | ||
|
||
// Helper functions for gradient descent | ||
double computeCost(const std::vector<double>& x, const std::vector<double>& y) const; | ||
void gradientDescent(const std::vector<double>& x, const std::vector<double>& y); | ||
}; | ||
|
||
} // namespace algorithms | ||
|
||
#endif // LINEAR_REGRESSION_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
#include "algorithms/kmeans.h" | ||
#include <iostream> | ||
#include <cmath> | ||
#include <limits> | ||
#include <random> | ||
|
||
|
||
namespace algorithms{ | ||
|
||
// Constructor | ||
KMeans::KMeans(int k, int maxIterations) : k(k), maxIterations(maxIterations) {} | ||
|
||
// Fit the model to the data | ||
void KMeans::fit(const std::vector<std::vector<double>>& data) { | ||
int numPoints = data.size(); | ||
int dimensions = data[0].size(); | ||
|
||
// Randomly initialize centroids | ||
initializeCentroids(data, dimensions); | ||
|
||
// Run the algorithm for a fixed number of iterations | ||
for (int iteration = 0; iteration < maxIterations; iteration++) { | ||
// Step 1: Assign points to the closest centroid | ||
assignClusters(data); | ||
|
||
// Step 2: Update centroids based on the points assigned to them | ||
updateCentroids(data, dimensions); | ||
|
||
// Check for convergence (not implemented for simplicity, fixed iterations) | ||
if (converged()) { | ||
break; | ||
} | ||
} | ||
} | ||
|
||
// Get cluster labels | ||
const std::vector<int>& KMeans::getLabels() const { | ||
return labels; | ||
} | ||
|
||
// Get cluster centers | ||
const std::vector<std::vector<double>>& KMeans::getCentroids() const { | ||
return centroids; | ||
} | ||
|
||
// Randomly initialize centroids from the data points | ||
void KMeans::initializeCentroids(const std::vector<std::vector<double>>& data, int dimensions) { | ||
centroids.resize(k, std::vector<double>(dimensions)); | ||
std::random_device rd; | ||
std::mt19937 gen(rd()); | ||
std::uniform_int_distribution<> dis(0, data.size() - 1); | ||
|
||
for (int i = 0; i < k; i++) { | ||
centroids[i] = data[dis(gen)]; | ||
} | ||
} | ||
|
||
// Assign each point to the nearest centroid | ||
void KMeans::assignClusters(const std::vector<std::vector<double>>& data) { | ||
labels.resize(data.size()); | ||
for (size_t i = 0; i < data.size(); i++) { | ||
double minDist = std::numeric_limits<double>::max(); | ||
int closestCentroid = 0; | ||
for (int j = 0; j < k; j++) { | ||
double dist = euclideanDistance(data[i], centroids[j]); | ||
if (dist < minDist) { | ||
minDist = dist; | ||
closestCentroid = j; | ||
} | ||
} | ||
labels[i] = closestCentroid; | ||
} | ||
} | ||
|
||
// Update centroids based on the mean of the assigned points | ||
void KMeans::updateCentroids(const std::vector<std::vector<double>>& data, int dimensions) { | ||
std::vector<std::vector<double>> newCentroids(k, std::vector<double>(dimensions, 0.0)); | ||
std::vector<int> pointsPerCentroid(k, 0); | ||
|
||
// Sum the points assigned to each centroid | ||
for (size_t i = 0; i < data.size(); i++) { | ||
int centroidIndex = labels[i]; | ||
pointsPerCentroid[centroidIndex]++; | ||
for (int d = 0; d < dimensions; d++) { | ||
newCentroids[centroidIndex][d] += data[i][d]; | ||
} | ||
} | ||
|
||
// Update the centroids by computing the average | ||
for (int j = 0; j < k; j++) { | ||
if (pointsPerCentroid[j] > 0) { | ||
for (int d = 0; d < dimensions; d++) { | ||
newCentroids[j][d] /= pointsPerCentroid[j]; | ||
} | ||
} | ||
} | ||
|
||
centroids = newCentroids; | ||
} | ||
|
||
// Calculate Euclidean distance between two points | ||
double KMeans::euclideanDistance(const std::vector<double>& p1, const std::vector<double>& p2) const { | ||
double sum = 0.0; | ||
for (size_t i = 0; i < p1.size(); i++) { | ||
sum += std::pow(p1[i] - p2[i], 2); | ||
} | ||
return std::sqrt(sum); | ||
} | ||
|
||
// Check if the centroids have converged (optional, currently fixed iterations) | ||
bool KMeans::converged() const { | ||
// This can be implemented with a threshold to check if centroids have stopped moving | ||
return false; | ||
} | ||
|
||
} |