diff --git a/CMakeLists.txt b/CMakeLists.txt index a6f02002c..15026c44b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -160,7 +160,7 @@ if(BML_INTERNAL_GEMM) endif() set(BLAS_VENDOR "" - CACHE STRING "If set, the preferred BLAS/LAPACK vendor. Possible choices: {Intel,MKL,ACML,GNU}") + CACHE STRING "If set, the preferred BLAS/LAPACK vendor. Possible choices: {,Intel,MKL,ACML,GNU}") if(BLAS_VENDOR STREQUAL "Intel" OR BLAS_VENDOR STREQUAL "MKL") message(STATUS "Attempting to use Intel's BLAS/LAPACK (MKL)") diff --git a/src/C-interface/dense/bml_add_dense_typed.c b/src/C-interface/dense/bml_add_dense_typed.c index 2f1f9abe1..bb0a6287e 100644 --- a/src/C-interface/dense/bml_add_dense_typed.c +++ b/src/C-interface/dense/bml_add_dense_typed.c @@ -85,7 +85,6 @@ double TYPED_FUNC( shared(B_matrix, A_localRowMin, A_localRowMax) \ shared(N, myRank) \ reduction(+:trnorm) - //for (int i = 0; i < N * N; i++) for (int i = A_localRowMin[myRank] * N; i < A_localRowMax[myRank] * N; i++) { diff --git a/src/C-interface/dense/bml_allocate_dense_typed.c b/src/C-interface/dense/bml_allocate_dense_typed.c index 75f199d47..d2feed6b4 100644 --- a/src/C-interface/dense/bml_allocate_dense_typed.c +++ b/src/C-interface/dense/bml_allocate_dense_typed.c @@ -112,12 +112,9 @@ bml_matrix_dense_t *TYPED_FUNC( TYPED_FUNC(bml_zero_matrix_dense) (N, distrib_mode); REAL_T *A_dense = A->matrix; #pragma omp parallel for default(none) shared(A_dense) - for (int i = 0; i < N; i++) + for (int i = 0; i < N * N; i++) { - for (int j = 0; j < N; j++) - { - A_dense[ROWMAJOR(i, j, N, N)] = rand() / (double) RAND_MAX; - } + A_dense[i] = rand() / (double) RAND_MAX; } return A; } diff --git a/src/C-interface/ellpack/bml_add_ellpack_typed.c b/src/C-interface/ellpack/bml_add_ellpack_typed.c index 2c1a0e138..1cf9cd9bd 100644 --- a/src/C-interface/ellpack/bml_add_ellpack_typed.c +++ b/src/C-interface/ellpack/bml_add_ellpack_typed.c @@ -40,7 +40,6 @@ void TYPED_FUNC( int N = A->N; int A_M = A->M; int B_M = B->M; - int ix[N], jx[N]; int *A_nnz = A->nnz; @@ -67,7 +66,6 @@ void TYPED_FUNC( shared(A_index, A_value, A_nnz) \ shared(A_localRowMin, A_localRowMax) \ shared(B_index, B_value, B_nnz) - //for (int i = 0; i < N; i++) for (int i = A_localRowMin[myRank]; i < A_localRowMax[myRank]; i++) { int l = 0; @@ -79,7 +77,6 @@ void TYPED_FUNC( { x[k] = 0.0; ix[k] = i + 1; - //A_index[ROWMAJOR(i, l, N, A_M)] = k; jx[l] = k; l++; } @@ -94,7 +91,6 @@ void TYPED_FUNC( { x[k] = 0.0; ix[k] = i + 1; - //A_index[ROWMAJOR(i, l, N, A_M)] = k; jx[l] = k; l++; } @@ -106,7 +102,6 @@ void TYPED_FUNC( for (int jp = 0; jp < l; jp++) { int jind = jx[jp]; - //REAL_T xTmp = x[A_index[ROWMAJOR(i, jp, N, A_M)]]; REAL_T xTmp = x[jind]; if (is_above_threshold(xTmp, threshold)) { @@ -154,8 +149,6 @@ double TYPED_FUNC( int *B_nnz = B->nnz; int *B_index = B->index; - int ind, ind2; - REAL_T x[N]; REAL_T y[N]; REAL_T *A_value = (REAL_T *) A->value; @@ -177,7 +170,6 @@ double TYPED_FUNC( shared(A_localRowMin, A_localRowMax) \ shared(B_index, B_value, B_nnz) \ reduction(+:trnorm) - //for (int i = 0; i < N; i++) for (int i = A_localRowMin[myRank]; i < A_localRowMax[myRank]; i++) { int l = 0; @@ -190,7 +182,6 @@ double TYPED_FUNC( x[k] = 0.0; ix[k] = i + 1; y[k] = 0.0; - //A_index[ROWMAJOR(i, l, N, A_M)] = k; jx[l] = k; l++; } @@ -207,7 +198,6 @@ double TYPED_FUNC( x[k] = 0.0; ix[k] = i + 1; y[k] = 0.0; - //A_index[ROWMAJOR(i, l, N, A_M)] = k; jx[l] = k; l++; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 71a5974b8..9d724f379 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,7 +1,7 @@ include_directories(${CMAKE_SOURCE_DIR}/src/C-interface) set(SOURCES_TYPED - add_matrix_typed.c + test_bml_add_typed.c adjacency_matrix_typed.c adjungate_triangle_matrix_typed.c allocate_matrix_typed.c @@ -39,7 +39,7 @@ set_target_properties(bmltests POSITION_INDEPENDENT_CODE yes) add_executable(bml-test - add_matrix.c + test_bml_add.c adjacency_matrix.c adjungate_triangle_matrix.c allocate_matrix.c diff --git a/tests/add_matrix_typed.c b/tests/add_matrix_typed.c deleted file mode 100644 index c5dd31ff8..000000000 --- a/tests/add_matrix_typed.c +++ /dev/null @@ -1,73 +0,0 @@ -#include "bml.h" -#include "../typed.h" - -#include -#include -#include -#include -#if defined(SINGLE_REAL) || defined(SINGLE_COMPLEX) -#define REL_TOL 1e-6 -#else -#define REL_TOL 1e-12 -#endif - -int TYPED_FUNC( - test_add) ( - const int N, - const bml_matrix_type_t matrix_type, - const bml_matrix_precision_t matrix_precision, - const int M) -{ - bml_matrix_t *A = NULL; - bml_matrix_t *B = NULL; - bml_matrix_t *C = NULL; - - REAL_T *A_dense = NULL; - REAL_T *B_dense = NULL; - REAL_T *C_dense = NULL; - - double alpha = 1.2; - double beta = 0.8; - double threshold = 0.0; - - LOG_DEBUG("rel. tolerance = %e\n", REL_TOL); - - A = bml_random_matrix(matrix_type, matrix_precision, N, M, sequential); - B = bml_copy_new(A); - C = bml_random_matrix(matrix_type, matrix_precision, N, M, sequential); - - bml_add(B, C, alpha, beta, threshold); - - A_dense = bml_convert_to_dense(A, dense_row_major); - B_dense = bml_convert_to_dense(B, dense_row_major); - C_dense = bml_convert_to_dense(C, dense_row_major); - bml_print_dense_matrix(N, matrix_precision, dense_row_major, A_dense, 0, - N, 0, N); - bml_print_dense_matrix(N, matrix_precision, dense_row_major, B_dense, 0, - N, 0, N); - bml_print_dense_matrix(N, matrix_precision, dense_row_major, C_dense, 0, - N, 0, N); - for (int i = 0; i < N * N; i++) - { - double expected = alpha * A_dense[i] + beta * C_dense[i]; - double rel_diff = ABS((expected - B_dense[i]) / expected); - if (rel_diff > REL_TOL) - { - LOG_ERROR - ("matrices are not identical; expected[%d] = %e, B[%d] = %e\n", - i, expected, i, B_dense[i]); - return -1; - } - } - bml_free_memory(A_dense); - bml_free_memory(B_dense); - bml_free_memory(C_dense); - - bml_deallocate(&A); - bml_deallocate(&B); - bml_deallocate(&C); - - LOG_INFO("add matrix test passed\n"); - - return 0; -} diff --git a/tests/bml_test.c b/tests/bml_test.c index 1fd6866d0..965237b86 100644 --- a/tests/bml_test.c +++ b/tests/bml_test.c @@ -66,7 +66,7 @@ const char *test_description[] = { }; const test_function_t testers[] = { - test_add, + test_bml_add, test_adjacency, test_adjungate_triangle, test_allocate, diff --git a/tests/bml_test.h b/tests/bml_test.h index 05979f778..2d3bef81e 100644 --- a/tests/bml_test.h +++ b/tests/bml_test.h @@ -10,7 +10,6 @@ typedef int ( const bml_matrix_precision_t matrix_precision, const int M); -#include "add_matrix.h" #include "adjacency_matrix.h" #include "adjungate_triangle_matrix.h" #include "allocate_matrix.h" @@ -27,6 +26,7 @@ typedef int ( #include "scale_matrix.h" #include "set_row.h" #include "submatrix_matrix.h" +#include "test_bml_add.h" #include "test_bml_gemm.h" #include "threshold_matrix.h" #include "trace_matrix.h" diff --git a/tests/add_matrix.c b/tests/test_bml_add.c similarity index 50% rename from tests/add_matrix.c rename to tests/test_bml_add.c index 977035c09..2384d9258 100644 --- a/tests/add_matrix.c +++ b/tests/test_bml_add.c @@ -4,7 +4,7 @@ #include int -test_add( +test_bml_add( const int N, const bml_matrix_type_t matrix_type, const bml_matrix_precision_t matrix_precision, @@ -13,18 +13,20 @@ test_add( switch (matrix_precision) { case single_real: - return test_add_single_real(N, matrix_type, matrix_precision, M); + return test_bml_add_single_real(N, matrix_type, matrix_precision, + M); break; case double_real: - return test_add_double_real(N, matrix_type, matrix_precision, M); + return test_bml_add_double_real(N, matrix_type, matrix_precision, + M); break; case single_complex: - return test_add_single_complex(N, matrix_type, matrix_precision, - M); + return test_bml_add_single_complex(N, matrix_type, + matrix_precision, M); break; case double_complex: - return test_add_double_complex(N, matrix_type, matrix_precision, - M); + return test_bml_add_double_complex(N, matrix_type, + matrix_precision, M); break; default: fprintf(stderr, "unknown matrix precision\n"); diff --git a/tests/add_matrix.h b/tests/test_bml_add.h similarity index 77% rename from tests/add_matrix.h rename to tests/test_bml_add.h index d6bafd4a3..234070ef9 100644 --- a/tests/add_matrix.h +++ b/tests/test_bml_add.h @@ -1,33 +1,33 @@ -#ifndef __ADD_MATRIX_H -#define __ADD_MATRIX_H +#ifndef __TEST_BML_ADD_H +#define __TEST_BML_ADD_H #include -int test_add( +int test_bml_add( const int N, const bml_matrix_type_t matrix_type, const bml_matrix_precision_t matrix_precision, const int M); -int test_add_single_real( +int test_bml_add_single_real( const int N, const bml_matrix_type_t matrix_type, const bml_matrix_precision_t matrix_precision, const int M); -int test_add_double_real( +int test_bml_add_double_real( const int N, const bml_matrix_type_t matrix_type, const bml_matrix_precision_t matrix_precision, const int M); -int test_add_single_complex( +int test_bml_add_single_complex( const int N, const bml_matrix_type_t matrix_type, const bml_matrix_precision_t matrix_precision, const int M); -int test_add_double_complex( +int test_bml_add_double_complex( const int N, const bml_matrix_type_t matrix_type, const bml_matrix_precision_t matrix_precision, diff --git a/tests/test_bml_add_typed.c b/tests/test_bml_add_typed.c new file mode 100644 index 000000000..0b472e554 --- /dev/null +++ b/tests/test_bml_add_typed.c @@ -0,0 +1,198 @@ +#include "bml.h" +#include "../typed.h" +#include "../macros.h" + +#include +#include +#include +#include +#if defined(SINGLE_REAL) || defined(SINGLE_COMPLEX) +#define REL_TOL 1e-6 +#else +#define REL_TOL 1e-12 +#endif + +int TYPED_FUNC( + test_bml_add) ( + const int N, + const bml_matrix_type_t matrix_type, + const bml_matrix_precision_t matrix_precision, + const int M) +{ + bml_matrix_t *A = NULL; + bml_matrix_t *B = NULL; + bml_matrix_t *C = NULL; + + REAL_T *A_dense = NULL; + REAL_T *B_dense = NULL; + REAL_T *C_dense = NULL; + + int result = 0; + + double norm; + double expected_norm; + + double alpha = 1.2; + double beta = 0.8; + double threshold = 0.0; + + LOG_DEBUG("rel. tolerance = %e\n", REL_TOL); + + A = bml_random_matrix(matrix_type, matrix_precision, N, M, sequential); + A_dense = bml_export_to_dense(A, dense_row_major); + + LOG_INFO("A_dense:\n"); + bml_print_dense_matrix(N, matrix_precision, dense_row_major, A_dense, 0, + N, 0, N); + C = bml_random_matrix(matrix_type, matrix_precision, N, M, sequential); + C_dense = bml_export_to_dense(C, dense_row_major); + + LOG_INFO("C_dense:\n"); + bml_print_dense_matrix(N, matrix_precision, dense_row_major, C_dense, 0, + N, 0, N); + LOG_INFO("Testing bml_add()\n"); + + B = bml_copy_new(A); + bml_add(B, C, alpha, beta, threshold); + B_dense = bml_export_to_dense(B, dense_row_major); + + LOG_INFO("B_dense:\n"); + bml_print_dense_matrix(N, matrix_precision, dense_row_major, B_dense, 0, + N, 0, N); + for (int i = 0; i < N * N; i++) + { + double expected = alpha * A_dense[i] + beta * C_dense[i]; + double rel_diff = ABS((expected - B_dense[i]) / expected); + if (rel_diff > REL_TOL) + { + LOG_INFO + ("matrices are not identical; expected[%d] = %e, B[%d] = %e\n", + i, expected, i, B_dense[i]); + result = -1; + } + } + bml_free_memory(B_dense); + bml_deallocate(&B); + + LOG_INFO("Testing bml_add_norm()\n"); + + B = bml_copy_new(A); + norm = bml_add_norm(B, C, alpha, beta, threshold); + B_dense = bml_export_to_dense(B, dense_row_major); + + LOG_INFO("B_dense:\n"); + bml_print_dense_matrix(N, matrix_precision, dense_row_major, B_dense, 0, + N, 0, N); + expected_norm = 0; + for (int i = 0; i < N * N; i++) + { + expected_norm += C_dense[i] * C_dense[i]; + double expected = alpha * A_dense[i] + beta * C_dense[i]; + double rel_diff = ABS((expected - B_dense[i]) / expected); + if (rel_diff > REL_TOL) + { + LOG_INFO + ("matrices are not identical; expected[%d] = %e, B[%d] = %e\n", + i, expected, i, B_dense[i]); + result = -1; + } + } + if (ABS(expected_norm - norm) / expected_norm > REL_TOL) + { + LOG_INFO("norm mismatch: expected = %e, norm = %e\n", expected_norm, norm); + result = -1; + } + bml_free_memory(B_dense); + bml_deallocate(&B); + + LOG_INFO("Testing bml_add_identity()\n"); + + B = bml_copy_new(A); + bml_add_identity(B, beta, threshold); + B_dense = bml_export_to_dense(B, dense_row_major); + + LOG_INFO("B_dense:\n"); + bml_print_dense_matrix(N, matrix_precision, dense_row_major, B_dense, 0, + N, 0, N); + for (int i = 0; i < N; i++) + { + for (int j = 0; j < N; j++) + { + double expected = 0; + if (i == j) + { + expected = A_dense[ROWMAJOR(i, i, N, N)] + beta; + } + else + { + expected = A_dense[ROWMAJOR(i, j, N, N)]; + } + + double rel_diff = + ABS((expected - B_dense[ROWMAJOR(i, j, N, N)]) / expected); + if (rel_diff > REL_TOL) + { + LOG_INFO + ("matrices are not identical; expected[%d] = %e, B[%d] = %e\n", + i, expected, i, B_dense[ROWMAJOR(i, j, N, N)]); + result = -1; + } + } + } + bml_free_memory(B_dense); + bml_deallocate(&B); + + LOG_INFO("Testing bml_scale_add_identity()\n"); + + B = bml_copy_new(A); + bml_scale_add_identity(B, alpha, beta, threshold); + B_dense = bml_export_to_dense(B, dense_row_major); + + LOG_INFO("B_dense:\n"); + bml_print_dense_matrix(N, matrix_precision, dense_row_major, B_dense, 0, + N, 0, N); + for (int i = 0; i < N; i++) + { + for (int j = 0; j < N; j++) + { + double expected = 0; + if (i == j) + { + expected = alpha * A_dense[ROWMAJOR(i, i, N, N)] + beta; + } + else + { + expected = alpha * A_dense[ROWMAJOR(i, j, N, N)]; + } + + double rel_diff = + ABS((expected - B_dense[ROWMAJOR(i, j, N, N)]) / expected); + if (rel_diff > REL_TOL) + { + LOG_INFO + ("matrices are not identical; expected[%d] = %e, B[%d] = %e\n", + i, expected, i, B_dense[ROWMAJOR(i, j, N, N)]); + result = -1; + } + } + } + bml_free_memory(B_dense); + bml_deallocate(&B); + + bml_free_memory(A_dense); + bml_free_memory(C_dense); + + bml_deallocate(&A); + bml_deallocate(&C); + + if (result == 0) + { + LOG_INFO("add matrix test passed\n"); + } + else + { + LOG_INFO("add matrix test failed\n"); + } + + return result; +} diff --git a/update_tags.sh b/update_tags.sh old mode 100644 new mode 100755