-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcpu-benchmark-mpi.cpp
101 lines (83 loc) · 3.52 KB
/
cpu-benchmark-mpi.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#include <iostream>
#include <vector>
#include <chrono>
#include <random>
#include <cmath>
#include <mpi.h>
const int MATRIX_SIZE = 2048; // Adjust this based on your system's memory
const int NUM_ITERATIONS = 10;
// Function to initialize a matrix with random values
void initializeMatrix(std::vector<std::vector<double>>& matrix) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis(0.0, 1.0);
for (int i = 0; i < matrix.size(); ++i) {
for (int j = 0; j < matrix[0].size(); ++j) {
matrix[i][j] = dis(gen);
}
}
}
// Function to perform matrix multiplication
void matrixMultiply(const std::vector<std::vector<double>>& A,
const std::vector<std::vector<double>>& B,
std::vector<std::vector<double>>& C,
int start_row, int end_row) {
for (int i = start_row; i < end_row; ++i) {
for (int j = 0; j < MATRIX_SIZE; ++j) {
double sum = 0.0;
for (int k = 0; k < MATRIX_SIZE; ++k) {
sum += A[i][k] * B[k][j];
}
C[i][j] = sum;
}
}
}
int main(int argc, char** argv) {
MPI_Init(&argc, &argv);
int world_size, world_rank;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
// Calculate the number of rows each process will handle
int rows_per_process = MATRIX_SIZE / world_size;
int start_row = world_rank * rows_per_process;
int end_row = (world_rank == world_size - 1) ? MATRIX_SIZE : start_row + rows_per_process;
// Allocate matrices
std::vector<std::vector<double>> A(MATRIX_SIZE, std::vector<double>(MATRIX_SIZE));
std::vector<std::vector<double>> B(MATRIX_SIZE, std::vector<double>(MATRIX_SIZE));
std::vector<std::vector<double>> C(MATRIX_SIZE, std::vector<double>(MATRIX_SIZE));
// Initialize matrices on the root process
if (world_rank == 0) {
initializeMatrix(A);
initializeMatrix(B);
}
// Broadcast matrices A and B to all processes
MPI_Bcast(A.data()->data(), MATRIX_SIZE * MATRIX_SIZE, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(B.data()->data(), MATRIX_SIZE * MATRIX_SIZE, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Warm-up run
matrixMultiply(A, B, C, start_row, end_row);
// Measure performance
double total_time = 0.0;
for (int iter = 0; iter < NUM_ITERATIONS; ++iter) {
MPI_Barrier(MPI_COMM_WORLD);
auto start = std::chrono::high_resolution_clock::now();
matrixMultiply(A, B, C, start_row, end_row);
MPI_Barrier(MPI_COMM_WORLD);
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = end - start;
total_time += diff.count();
}
// Gather results
MPI_Gather(C[start_row].data(), rows_per_process * MATRIX_SIZE, MPI_DOUBLE,
C.data()->data(), rows_per_process * MATRIX_SIZE, MPI_DOUBLE,
0, MPI_COMM_WORLD);
if (world_rank == 0) {
double average_time = total_time / NUM_ITERATIONS;
double gflops = (2.0 * MATRIX_SIZE * MATRIX_SIZE * MATRIX_SIZE) / (average_time * 1e9);
std::cout << "Matrix size: " << MATRIX_SIZE << "x" << MATRIX_SIZE << std::endl;
std::cout << "Number of processes: " << world_size << std::endl;
std::cout << "Average time per multiplication: " << average_time << " seconds" << std::endl;
std::cout << "Performance: " << gflops << " GFLOPS" << std::endl;
}
MPI_Finalize();
return 0;
}