Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update with the file we used for benchmarking #1889

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions enzyme/test/Integration/Sparse/eigen_analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ static T eigenstuffM(const T *__restrict__ pos0, size_t n, const int *__restrict
// Calculate total energy for all faces in 3D
template<typename T>
__attribute__((always_inline))
static T eigenstuffL(const T *__restrict__ x, size_t num_faces, const int *__restrict__ faces, const T *__restrict__ pos0) {
static T eigenstuffL(const T *__restrict__ pos0, size_t num_faces, const int *__restrict__ faces, const T *__restrict__ x) {
T sum = 0;
__builtin_assume(num_faces != 0);
for (size_t idx=0; idx<num_faces; idx++) {
Expand Down Expand Up @@ -143,7 +143,7 @@ template<typename T>
__attribute__((always_inline))
static void gradient_ip(const T *__restrict__ pos0, const size_t num_faces, const int* faces, const T *__restrict__ x, T *__restrict__ out)
{
__enzyme_autodiff<void>((void *)eigenstuffM<T>,
__enzyme_autodiff<void>((void *)eigenstuffL<T>,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is a test can you also have a function which differentiates the other function as well

enzyme_const, pos0,
enzyme_const, num_faces,
enzyme_const, faces,
Expand Down Expand Up @@ -206,10 +206,10 @@ int main(int argc, char** argv) {

// Call eigenstuffM_simple
struct timeval start, end;
gettimeofday(&start, NULL);
const float resultM = eigenstuffM(pos0, num_faces, faces, x);
gettimeofday(&end, NULL);
printf("Result for eigenstuffM_simple: %f, runtime:%f\n", resultM, tdiff(&start, &end));
// gettimeofday(&start, NULL);
// const float resultM = eigenstuffM(pos0, num_faces, faces, x);
// gettimeofday(&end, NULL);
// printf("Result for eigenstuffM_simple: %f, runtime:%f\n", resultM, tdiff(&start, &end));

// Call eigenstuffL_simple
gettimeofday(&start, NULL);
Expand All @@ -222,10 +222,10 @@ int main(int argc, char** argv) {
dx[i] = 0;
gradient_ip(pos0, num_faces, faces, x, dx);

if (x_pts < 30) {
for (size_t i=0; i<sizeof(dx)/sizeof(dx[0]); i++)
printf("eigenstuffM grad_vert[%zu]=%f\n", i, dx[i]);
}
// if (x_pts < 30) {
// for (size_t i=0; i<sizeof(dx)/sizeof(dx[0]); i++)
// printf("eigenstuffM grad_vert[%zu]=%f\n", i, dx[i]);
// }

gettimeofday(&start, NULL);
auto hess_x = hessian(pos0, num_faces, faces, x, x_pts);
Expand All @@ -235,10 +235,10 @@ int main(int argc, char** argv) {

printf("Runtime %0.6f\n", tdiff(&start, &end));

if (x_pts <= 8)
for (auto &hess : hess_x) {
printf("i=%lu, j=%lu, val=%f\n", hess.row, hess.col, hess.val);
}
// if (x_pts <= 8)
// for (auto &hess : hess_x) {
// printf("i=%lu, j=%lu, val=%f\n", hess.row, hess.col, hess.val);
// }

return 0;
}
Expand Down
52 changes: 45 additions & 7 deletions enzyme/test/Integration/Sparse/matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ struct Triple {
};

__attribute__((enzyme_sparse_accumulate))
static void inner_storeflt(int64_t row, int64_t col, float val, std::vector<Triple<float>> &triplets) {
static void inner_storeflt(size_t row, size_t col, float val, std::vector<Triple<float>> &triplets) {
#ifdef BENCHMARK
if (val == 0.0) return;
#else
Expand All @@ -26,9 +26,9 @@ static void inner_storeflt(int64_t row, int64_t col, float val, std::vector<Trip
}

__attribute__((enzyme_sparse_accumulate))
static void inner_storedbl(int64_t row, int64_t col, double val, std::vector<Triple<double>> &triplets) {
static void inner_storedbl(size_t row, size_t col, double val, std::vector<Triple<double>> &triplets) {
#ifdef BENCHMARK
if (val == 0.0) return;
if (val > -1e-10 && val < 1e-10) return;
#else
#warning "Compiling for debug/verfication, performance may be slowed"
#endif
Expand All @@ -37,7 +37,7 @@ static void inner_storedbl(int64_t row, int64_t col, double val, std::vector<Tri

template<typename T>
__attribute__((always_inline))
static void sparse_store(T val, int64_t idx, size_t i, std::vector<Triple<T>> &triplets) {
static void sparse_store(T val, size_t idx, size_t i, std::vector<Triple<T>> &triplets) {
if (val == 0.0) return;
idx /= sizeof(T);
if constexpr (sizeof(T) == 4)
Expand All @@ -48,19 +48,57 @@ static void sparse_store(T val, int64_t idx, size_t i, std::vector<Triple<T>> &t

template<typename T>
__attribute__((always_inline))
static T sparse_load(int64_t idx, size_t i, std::vector<Triple<T>> &triplets) {
static T sparse_load(size_t idx, size_t i, std::vector<Triple<T>> &triplets) {
return 0.0;
}


__attribute__((enzyme_sparse_accumulate))
static void inner_storeflt_modn(size_t row, size_t col, float val, size_t N, std::vector<Triple<float>> &triplets) {
#ifdef BENCHMARK
if (val == 0.0) return;
#else
#warning "Compiling for debug/verfication, performance may be slowed"
#endif
triplets.emplace_back(row, col % N, val);
}

__attribute__((enzyme_sparse_accumulate))
static void inner_storedbl_modn(size_t row, size_t col, double val, size_t N, std::vector<Triple<double>> &triplets) {
#ifdef BENCHMARK
if (val > -1e-10 && val < 1e-10) return;
#else
#warning "Compiling for debug/verfication, performance may be slowed"
#endif
triplets.emplace_back(row, col % N, val);
}

template<typename T>
__attribute__((always_inline))
static void sparse_store_modn(T val, size_t idx, size_t i, size_t N, std::vector<Triple<T>> &triplets) {
if (val == 0.0) return;
idx /= sizeof(T);
if constexpr (sizeof(T) == 4)
inner_storeflt_modn(i, idx, val, N, triplets);
else
inner_storedbl_modn(i, idx, val, N, triplets);
}

template<typename T>
__attribute__((always_inline))
static T sparse_load_modn(int64_t idx, size_t i, size_t N, std::vector<Triple<T>> &triplets) {
return 0.0;
}

template<typename T>
__attribute__((always_inline))
static void ident_store(T, int64_t idx, size_t i) {
static void ident_store(T, size_t idx, size_t i) {
assert(0 && "should never load");
}

template<typename T>
__attribute__((always_inline))
static T ident_load(int64_t idx, size_t i) {
static T ident_load(size_t idx, size_t i) {
idx /= sizeof(T);
return (T)(idx == i);// ? 1.0 : 0.0;
}
Expand Down
84 changes: 77 additions & 7 deletions enzyme/test/Integration/Sparse/ringspring3Drestlengthone.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
// TODO: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O2 %s -S -emit-llvm -o - %newLoadClangEnzyme -mllvm -enzyme-auto-sparsity=1 -S | %lli - ; fi
// TODO: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O3 %s -S -emit-llvm -o - %newLoadClangEnzyme -mllvm -enzyme-auto-sparsity=1 -S | %lli - ; fi

// /usr/local/Cellar/llvm@15/15.0.7/bin/clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O3 /Users/jessemichel/research/sparse_project/benchmark_enzyme/Enzyme/enzyme/test/Integration/Sparse/ringspring3Drestlengthone.cpp -fpass-plugin=/Users/jessemichel/research/sparse_project/benchmark_enzyme/Enzyme/enzyme/build15D/Enzyme/ClangEnzyme-15.dylib -Xclang -load -Xclang /Users/jessemichel/research/sparse_project/benchmark_enzyme/Enzyme/enzyme/build15D/Enzyme/ClangEnzyme-15.dylib -mllvm -enzyme-auto-sparsity=1 -o ringspring3Drestlengthone.out


#include <stdio.h>
#include <assert.h>
#include <vector>
Expand All @@ -21,7 +24,7 @@ __attribute__((always_inline))
static double f(size_t N, T* __restrict__ pos) {
double e = 0.;
__builtin_assume(N != 0);
for (size_t j = 0; j < N/3; j ++) {
for (size_t j = 0; j < N; j ++) {
size_t i = 3 * j;
T vx = pos[i];
T vy = pos[i + 1];
Expand Down Expand Up @@ -49,22 +52,23 @@ static void never_store(T val, int64_t idx, T* input, size_t N) {
assert(0 && "this is a read only input, why are you storing here...");
}

template<typename T>
__attribute__((always_inline))
static double mod_load(int64_t idx, double* input, size_t N) {
idx /= sizeof(double);
static T mod_load(int64_t idx, T* input, size_t N) {
idx /= sizeof(T);
return input[idx % N];
}

template<typename T>
__attribute__((noinline))
std::vector<Triple<T>> hess_f(size_t N, T* input) {
std::vector<Triple<T>> triplets;
// input = __enzyme_todense((void*)mod_load, (void*)never_store, input, N);
input = __enzyme_post_sparse_todense<T*>((void*)mod_load<T>, (void*)never_store<T>, input, N);
__builtin_assume(N > 0);
for (size_t i=0; i<N; i++) {
__builtin_assume(i < 100000000);
T* d_input = __enzyme_todense<T*>((void*)ident_load<T>, (void*)ident_store<T>, i);
T* d_dinput = __enzyme_todense<T*>((void*)sparse_load<T>, (void*)sparse_store<T>, i, &triplets);
T* d_dinput = __enzyme_todense<T*>((void*)sparse_load_modn<T>, (void*)sparse_store_modn<T>, i, N, &triplets);

__enzyme_fwddiff<void>((void*)grad_f<T>,
enzyme_const, N,
Expand All @@ -85,6 +89,62 @@ std::vector<triple> hess_f2(size_t N, double* input) {
}
*/

template<typename T>
__attribute__((noinline))
std::vector<Triple<T>> handrolled_hess_f(size_t n, T* data) {
std::vector<Triple<T>> triplets;
int k = 3 * n;
for (int i = 0; i < n; ++i) {
int row = 3 * i;
double x = data[row % k];
double y = data[(row + 1) % k];
double z = data[(row + 2) % k];
double a = data[(row + 3) % k];
double b = data[(row + 4) % k];
double c = data[(row + 5) % k];

double f = pow(x - a, 2) + pow(y - b, 2) + pow(z - c, 2);
// double f = (x - a) * (x - a) + (y - b) * (y - b) + (z - c) * (z - c);
double g = 2 * (1 - 1 / sqrt(f));

// Diagonal terms
#pragma clang loop unroll(full)
for (int j = 0; j < 6; ++j) {
T val = g;
#ifdef BENCHMARK
if (val > -1e-10 && val < 1e-10) continue;
#endif
triplets.emplace_back((row + j) % k, (row + j) % k, val);
}

#pragma clang loop unroll(full)
for (int j = 0; j < 6; ++j) {
T val = -g;
#ifdef BENCHMARK
if (val > -1e-10 && val < 1e-10) continue;
#endif
triplets.emplace_back((row + ((j + 3) % 6)) % k, (row + j) % k, val);
}

// Cross terms
std::vector<double> half_grad_f = {x - a, y - b, z - c, a - x, b - y, c - z};
// double half_grad_f[] = {x - a, y - b, z - c, a - x, b - y, c - z};
#pragma clang loop unroll(full)
for (int j = 0; j < 6; ++j) {
#pragma clang loop unroll(full)
for (int l = 0; l < 6; ++l) {
T val = 2 * half_grad_f[j] * half_grad_f[l] * pow(f, -1.5);
// T val = 2 * half_grad_f[j] * half_grad_f[l] / ( f * sqrt(f) );
#ifdef BENCHMARK
if (val > -1e-10 && val < 1e-10) continue;
#endif
triplets.emplace_back((row + j) % k, (row + l) % k, val);
}
}
}
return triplets;
}

// int argc, char** argv
int main(int argc, char** argv) {
size_t N = 30;
Expand All @@ -93,16 +153,18 @@ int main(int argc, char** argv) {
N = atoi(argv[1]);
}

double *x = (double*)malloc(sizeof(double) * 3 * N);
double *x = (double*)malloc(sizeof(double) * (3 * N + 3));
for (int i = 0; i < N; ++i) {
double angle = 2 * M_PI * i / N;
x[3 * i] = cos(angle) ;//+ normal(generator);
x[3 * i + 1] = sin(angle) ;//+ normal(generator);
x[3 * i + 2] = 0;//normal(generator);
}


for (int i=0; i<10; i++)
{
struct timeval start, end;

gettimeofday(&start, NULL);

auto res = hess_f(N, x);
Expand All @@ -118,6 +180,14 @@ int main(int argc, char** argv) {
printf("%ld, %ld = %f\n", tup.row, tup.col, tup.val);
}

gettimeofday(&start, NULL);
auto hand_res = handrolled_hess_f(N, x);
gettimeofday(&end, NULL);

printf("Handrolled Number of elements %ld\n", hand_res.size());
printf(" Runtime %0.6f\n", tdiff(&start, &end));
}

return 0;
}

Loading