From 0c40d0bee41546c7f8c9bff010bed4e22b8d51d0 Mon Sep 17 00:00:00 2001 From: Ed Hartnett Date: Tue, 14 May 2019 06:57:42 -0600 Subject: [PATCH 1/8] changed comment --- tests/cunit/test_async_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cunit/test_async_perf.c b/tests/cunit/test_async_perf.c index 74b21e0ddd8..45f304167be 100644 --- a/tests/cunit/test_async_perf.c +++ b/tests/cunit/test_async_perf.c @@ -1,5 +1,5 @@ /* - * This program tests darrays with async. + * This program tests performance of darray writes with async. * * @author Ed Hartnett * @date 5/4/17 From e70aed50f04346fa95abbf6682306dac39088632 Mon Sep 17 00:00:00 2001 From: Ed Hartnett Date: Tue, 14 May 2019 09:15:34 -0600 Subject: [PATCH 2/8] made data smaller --- tests/cunit/test_async_perf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/cunit/test_async_perf.c b/tests/cunit/test_async_perf.c index 45f304167be..da20753eefe 100644 --- a/tests/cunit/test_async_perf.c +++ b/tests/cunit/test_async_perf.c @@ -36,9 +36,9 @@ #define LON_LEN 3 /* The length of our sample data along each dimension. */ -#define X_DIM_LEN 1024 -#define Y_DIM_LEN 1024 -#define Z_DIM_LEN 124 +#define X_DIM_LEN 128 +#define Y_DIM_LEN 128 +#define Z_DIM_LEN 128 /* The number of timesteps of data to write. */ #define NUM_TIMESTEPS 3 From adf2b3e6989e5fdd5ead563e9aa8183b6c2e7e32 Mon Sep 17 00:00:00 2001 From: Ed Hartnett Date: Tue, 14 May 2019 09:16:35 -0600 Subject: [PATCH 3/8] perf test development --- tests/cunit/test_async_perf.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/cunit/test_async_perf.c b/tests/cunit/test_async_perf.c index da20753eefe..160c079477c 100644 --- a/tests/cunit/test_async_perf.c +++ b/tests/cunit/test_async_perf.c @@ -97,14 +97,13 @@ int create_decomposition_3d(int ntasks, int my_rank, int iosysid, int *ioid) /* Run a simple test using darrays with async. */ int -run_darray_async_test(int iosysid, int fmt, int my_rank, int ntasks, MPI_Comm test_comm, - MPI_Comm comp_comm, int *flavor, int piotype) +run_darray_async_test(int iosysid, int fmt, int my_rank, int ntasks, int niotasks, + MPI_Comm test_comm, MPI_Comm comp_comm, int *flavor, int piotype) { int ioid3; int dim_len[NDIM4] = {NC_UNLIMITED, X_DIM_LEN, Y_DIM_LEN, Z_DIM_LEN}; PIO_Offset elements_per_pe2 = X_DIM_LEN * Y_DIM_LEN * Z_DIM_LEN / 3; char decomp_filename[PIO_MAX_NAME + 1]; - int niotasks = 1; int ret; sprintf(decomp_filename, "decomp_rdat_%s_.nc", TEST_NAME); @@ -200,7 +199,6 @@ int main(int argc, char **argv) int flavor[NUM_FLAVORS]; /* iotypes for the supported netCDF IO flavors. */ MPI_Comm test_comm; /* A communicator for this test. */ int iosysid; - int num_computation_procs = NUM_COMPUTATION_PROCS; MPI_Comm io_comm; /* Will get a duplicate of IO communicator. */ MPI_Comm comp_comm[COMPONENT_COUNT]; /* Will get duplicates of computation communicators. */ @@ -245,8 +243,8 @@ int main(int argc, char **argv) if (my_rank) { /* Run the simple darray async test. */ - if ((ret = run_darray_async_test(iosysid, fmt, my_rank, ntasks, test_comm, - comp_comm[0], flavor, PIO_INT))) + if ((ret = run_darray_async_test(iosysid, fmt, my_rank, ntasks, niotasks, + test_comm, comp_comm[0], flavor, PIO_INT))) return ret; /* Finalize PIO system. */ From fd16161fce046b816f41c269c2998c343cee629e Mon Sep 17 00:00:00 2001 From: Ed Hartnett Date: Tue, 14 May 2019 09:22:34 -0600 Subject: [PATCH 4/8] perf test development --- tests/cunit/test_async_perf.c | 142 +++++++++++++++++++--------------- 1 file changed, 79 insertions(+), 63 deletions(-) diff --git a/tests/cunit/test_async_perf.c b/tests/cunit/test_async_perf.c index 160c079477c..e005c1ae89d 100644 --- a/tests/cunit/test_async_perf.c +++ b/tests/cunit/test_async_perf.c @@ -46,6 +46,9 @@ /* Name of record test var. */ #define REC_VAR_NAME "Duncan_McCloud_of_the_clan_McCloud" +/* How many different number of IO tasks to check? */ +#define MAX_IO_TESTS 5 + char dim_name[NDIM4][PIO_MAX_NAME + 1] = {"unlim", "x", "y", "z"}; /* Length of the dimension. */ @@ -202,13 +205,14 @@ int main(int argc, char **argv) int num_computation_procs = NUM_COMPUTATION_PROCS; MPI_Comm io_comm; /* Will get a duplicate of IO communicator. */ MPI_Comm comp_comm[COMPONENT_COUNT]; /* Will get duplicates of computation communicators. */ - int niotasks = NUM_IO_PROCS; + int num_io_procs[MAX_IO_TESTS] = {1, 4, 16, 64, 128}; /* Number of processors that will do IO. */ + int num_io_tests; /* How many different num IO procs to try? */ int mpierr; + int fmt, niotest; int ret; /* Return code. */ /* Initialize test. */ - if ((ret = pio_test_init2(argc, argv, &my_rank, &ntasks, MIN_NTASKS, - TARGET_NTASKS, -1, &test_comm))) + if ((ret = pio_test_init2(argc, argv, &my_rank, &ntasks, 1, 0, -1, &test_comm))) ERR(ERR_INIT); if ((ret = PIOc_set_iosystem_error_handling(PIO_DEFAULT, PIO_RETURN_ERROR, NULL))) return ret; @@ -217,68 +221,80 @@ int main(int argc, char **argv) if ((ret = get_iotypes(&num_flavors, flavor))) ERR(ret); - - for (int fmt = 0; fmt < num_flavors; fmt++) + /* How many processors for IO? */ + num_io_tests = 1; + if (ntasks >= 32) + num_io_tests = 2; + if (ntasks >= 64) + num_io_tests = 3; + if (ntasks >= 128) + num_io_tests = 4; + if (ntasks >= 512) + num_io_tests = 5; + + for (niotest = 0; niotest < num_io_tests; niotest++) { - struct timeval starttime, endtime; - long long startt, endt; - long long delta; - float num_megabytes; - float delta_in_sec; - float mb_per_sec; - - /* Start the clock. */ - if (!my_rank) + for (fmt = 0; fmt < num_flavors; fmt++) { - gettimeofday(&starttime, NULL); - startt = (1000000 * starttime.tv_sec) + starttime.tv_usec; - } - - if ((ret = PIOc_init_async(test_comm, niotasks, NULL, COMPONENT_COUNT, - &num_computation_procs, NULL, &io_comm, comp_comm, - PIO_REARR_BOX, &iosysid))) - ERR(ERR_INIT); - - /* This code runs only on computation components. */ - if (my_rank) - { - /* Run the simple darray async test. */ - if ((ret = run_darray_async_test(iosysid, fmt, my_rank, ntasks, niotasks, - test_comm, comp_comm[0], flavor, PIO_INT))) - return ret; - - /* Finalize PIO system. */ - if ((ret = PIOc_finalize(iosysid))) - return ret; - - /* Free the computation conomponent communicator. */ - if ((mpierr = MPI_Comm_free(comp_comm))) - MPIERR(mpierr); - } - else - { - /* Free the IO communicator. */ - if ((mpierr = MPI_Comm_free(&io_comm))) - MPIERR(mpierr); - } - - if (!my_rank) - { - /* Stop the clock. */ - gettimeofday(&endtime, NULL); - - /* Compute the time delta */ - endt = (1000000 * endtime.tv_sec) + endtime.tv_usec; - delta = (endt - startt)/NUM_TIMESTEPS; - delta_in_sec = (float)delta / 1000000; - num_megabytes = (X_DIM_LEN * Y_DIM_LEN * Z_DIM_LEN * NUM_TIMESTEPS * - sizeof(int))/(1024*1024); - mb_per_sec = num_megabytes / delta_in_sec; - printf("%d\t%d\t%d\t%d\t%d\t%8.3f\t%8.1f\t%8.3f\n", ntasks, niotasks, - 1, 0, fmt, delta_in_sec, num_megabytes, mb_per_sec); - } - - } /* next fmt */ + struct timeval starttime, endtime; + long long startt, endt; + long long delta; + float num_megabytes; + float delta_in_sec; + float mb_per_sec; + + /* Start the clock. */ + if (!my_rank) + { + gettimeofday(&starttime, NULL); + startt = (1000000 * starttime.tv_sec) + starttime.tv_usec; + } + + if ((ret = PIOc_init_async(test_comm, num_io_procs[niotest], NULL, COMPONENT_COUNT, + &num_computation_procs, NULL, &io_comm, comp_comm, + PIO_REARR_BOX, &iosysid))) + ERR(ERR_INIT); + + /* This code runs only on computation components. */ + if (my_rank) + { + /* Run the simple darray async test. */ + if ((ret = run_darray_async_test(iosysid, fmt, my_rank, ntasks, num_io_procs[niotest], + test_comm, comp_comm[0], flavor, PIO_INT))) + return ret; + + /* Finalize PIO system. */ + if ((ret = PIOc_finalize(iosysid))) + return ret; + + /* Free the computation conomponent communicator. */ + if ((mpierr = MPI_Comm_free(comp_comm))) + MPIERR(mpierr); + } + else + { + /* Free the IO communicator. */ + if ((mpierr = MPI_Comm_free(&io_comm))) + MPIERR(mpierr); + } + + if (!my_rank) + { + /* Stop the clock. */ + gettimeofday(&endtime, NULL); + + /* Compute the time delta */ + endt = (1000000 * endtime.tv_sec) + endtime.tv_usec; + delta = (endt - startt)/NUM_TIMESTEPS; + delta_in_sec = (float)delta / 1000000; + num_megabytes = (X_DIM_LEN * Y_DIM_LEN * Z_DIM_LEN * NUM_TIMESTEPS * + sizeof(int))/(1024*1024); + mb_per_sec = num_megabytes / delta_in_sec; + printf("%d\t%d\t%d\t%d\t%d\t%8.3f\t%8.1f\t%8.3f\n", ntasks, num_io_procs[niotest], + 1, 0, fmt, delta_in_sec, num_megabytes, mb_per_sec); + } + } /* next fmt */ + } /* next niotest */ /* Finalize the MPI library. */ if ((ret = pio_test_finalize(&test_comm))) From a074378dd9545c1134a39b8e972890ad8c90d52b Mon Sep 17 00:00:00 2001 From: Ed Hartnett Date: Tue, 14 May 2019 09:23:15 -0600 Subject: [PATCH 5/8] perf test development --- tests/cunit/test_async_perf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/cunit/test_async_perf.c b/tests/cunit/test_async_perf.c index e005c1ae89d..603892dbe63 100644 --- a/tests/cunit/test_async_perf.c +++ b/tests/cunit/test_async_perf.c @@ -232,6 +232,10 @@ int main(int argc, char **argv) if (ntasks >= 512) num_io_tests = 5; + if (!my_rank) + printf("ntasks\tnio\trearr\tfill\tformat\ttime(s)\tdata size (MB)\t" + "performance(MB/s)\n"); + for (niotest = 0; niotest < num_io_tests; niotest++) { for (fmt = 0; fmt < num_flavors; fmt++) @@ -300,7 +304,7 @@ int main(int argc, char **argv) if ((ret = pio_test_finalize(&test_comm))) return ret; - printf("%d %s SUCCESS!!\n", my_rank, TEST_NAME); + /* printf("%d %s SUCCESS!!\n", my_rank, TEST_NAME); */ return 0; } From a75df83f721403930f594e0313e875b4cb4516d9 Mon Sep 17 00:00:00 2001 From: Ed Hartnett Date: Tue, 14 May 2019 09:43:55 -0600 Subject: [PATCH 6/8] perf test development --- tests/cunit/test_async_perf.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/tests/cunit/test_async_perf.c b/tests/cunit/test_async_perf.c index 603892dbe63..c6de105620e 100644 --- a/tests/cunit/test_async_perf.c +++ b/tests/cunit/test_async_perf.c @@ -49,6 +49,8 @@ /* How many different number of IO tasks to check? */ #define MAX_IO_TESTS 5 +#define COMPONENT_COUNT 1 + char dim_name[NDIM4][PIO_MAX_NAME + 1] = {"unlim", "x", "y", "z"}; /* Length of the dimension. */ @@ -68,27 +70,29 @@ char dim_name[NDIM4][PIO_MAX_NAME + 1] = {"unlim", "x", "y", "z"}; * @param ioid a pointer that gets the ID of this decomposition. * @returns 0 for success, error code otherwise. **/ -int create_decomposition_3d(int ntasks, int my_rank, int iosysid, int *ioid) +int create_decomposition_3d(int ntasks, int my_rank, int iosysid, int *ioid, PIO_Offset *elements_per_pe) { - PIO_Offset elements_per_pe; /* Array elements per processing unit. */ + PIO_Offset my_elem_per_pe; /* Array elements per processing unit. */ PIO_Offset *compdof; /* The decomposition mapping. */ int dim_len_3d[NDIM3] = {X_DIM_LEN, Y_DIM_LEN, Z_DIM_LEN}; int my_proc_rank = my_rank - 1; int ret; /* How many data elements per task? */ - elements_per_pe = X_DIM_LEN * Y_DIM_LEN * Z_DIM_LEN / ntasks; + my_elem_per_pe = X_DIM_LEN * Y_DIM_LEN * Z_DIM_LEN / ntasks; + if (elements_per_pe) + *elements_per_pe = my_elem_per_pe; /* Allocate space for the decomposition array. */ - if (!(compdof = malloc(elements_per_pe * sizeof(PIO_Offset)))) + if (!(compdof = malloc(my_elem_per_pe * sizeof(PIO_Offset)))) return PIO_ENOMEM; /* Describe the decomposition. */ - for (int i = 0; i < elements_per_pe; i++) - compdof[i] = my_proc_rank * elements_per_pe + i; + for (int i = 0; i < my_elem_per_pe; i++) + compdof[i] = my_proc_rank * my_elem_per_pe + i; /* Create the PIO decomposition for this test. */ - if ((ret = PIOc_init_decomp(iosysid, PIO_INT, NDIM3, dim_len_3d, elements_per_pe, + if ((ret = PIOc_init_decomp(iosysid, PIO_INT, NDIM3, dim_len_3d, my_elem_per_pe, compdof, ioid, 0, NULL, NULL))) ERR(ret); @@ -105,14 +109,15 @@ run_darray_async_test(int iosysid, int fmt, int my_rank, int ntasks, int niotask { int ioid3; int dim_len[NDIM4] = {NC_UNLIMITED, X_DIM_LEN, Y_DIM_LEN, Z_DIM_LEN}; - PIO_Offset elements_per_pe2 = X_DIM_LEN * Y_DIM_LEN * Z_DIM_LEN / 3; + PIO_Offset elements_per_pe2; char decomp_filename[PIO_MAX_NAME + 1]; int ret; sprintf(decomp_filename, "decomp_rdat_%s_.nc", TEST_NAME); /* Decompose the data over the tasks. */ - if ((ret = create_decomposition_3d(ntasks - niotasks, my_rank, iosysid, &ioid3))) + if ((ret = create_decomposition_3d(ntasks - niotasks, my_rank, iosysid, &ioid3, + &elements_per_pe2))) return ret; { @@ -187,12 +192,6 @@ run_darray_async_test(int iosysid, int fmt, int my_rank, int ntasks, int niotask return ret; } -/* Initialize with task 0 as IO task, tasks 1-3 as a - * computation component. */ -#define NUM_IO_PROCS 1 -#define NUM_COMPUTATION_PROCS 3 -#define COMPONENT_COUNT 1 - /* Run Tests for pio_spmd.c functions. */ int main(int argc, char **argv) { @@ -202,7 +201,7 @@ int main(int argc, char **argv) int flavor[NUM_FLAVORS]; /* iotypes for the supported netCDF IO flavors. */ MPI_Comm test_comm; /* A communicator for this test. */ int iosysid; - int num_computation_procs = NUM_COMPUTATION_PROCS; + int num_computation_procs; MPI_Comm io_comm; /* Will get a duplicate of IO communicator. */ MPI_Comm comp_comm[COMPONENT_COUNT]; /* Will get duplicates of computation communicators. */ int num_io_procs[MAX_IO_TESTS] = {1, 4, 16, 64, 128}; /* Number of processors that will do IO. */ @@ -238,6 +237,8 @@ int main(int argc, char **argv) for (niotest = 0; niotest < num_io_tests; niotest++) { + num_computation_procs = ntasks - num_io_procs[niotest]; + for (fmt = 0; fmt < num_flavors; fmt++) { struct timeval starttime, endtime; From 7b056377f594728bb072dd44ffa28c90fdb1bbf0 Mon Sep 17 00:00:00 2001 From: Ed Hartnett Date: Tue, 14 May 2019 10:10:32 -0600 Subject: [PATCH 7/8] perf test development --- tests/cunit/test_async_perf.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/cunit/test_async_perf.c b/tests/cunit/test_async_perf.c index c6de105620e..af66c4e95bc 100644 --- a/tests/cunit/test_async_perf.c +++ b/tests/cunit/test_async_perf.c @@ -36,9 +36,9 @@ #define LON_LEN 3 /* The length of our sample data along each dimension. */ -#define X_DIM_LEN 128 -#define Y_DIM_LEN 128 -#define Z_DIM_LEN 128 +#define X_DIM_LEN 4 +#define Y_DIM_LEN 4 +#define Z_DIM_LEN 4 /* The number of timesteps of data to write. */ #define NUM_TIMESTEPS 3 @@ -261,7 +261,7 @@ int main(int argc, char **argv) ERR(ERR_INIT); /* This code runs only on computation components. */ - if (my_rank) + if (my_rank >= num_io_procs[niotest]) { /* Run the simple darray async test. */ if ((ret = run_darray_async_test(iosysid, fmt, my_rank, ntasks, num_io_procs[niotest], @@ -298,14 +298,14 @@ int main(int argc, char **argv) printf("%d\t%d\t%d\t%d\t%d\t%8.3f\t%8.1f\t%8.3f\n", ntasks, num_io_procs[niotest], 1, 0, fmt, delta_in_sec, num_megabytes, mb_per_sec); } + } /* next fmt */ } /* next niotest */ + /* printf("%d %s SUCCESS!!\n", my_rank, TEST_NAME); */ /* Finalize the MPI library. */ if ((ret = pio_test_finalize(&test_comm))) return ret; - /* printf("%d %s SUCCESS!!\n", my_rank, TEST_NAME); */ - return 0; } From 7c58bf2147f0ea6b07a1c5640fa894b6390220d3 Mon Sep 17 00:00:00 2001 From: Ed Hartnett Date: Tue, 14 May 2019 11:37:51 -0600 Subject: [PATCH 8/8] added development to version --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 2d342faf2db..714b186cccc 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ ## Ed Hartnett 8/16/17 # Initialize autoconf and automake. -AC_INIT(pio, 2.4.2) +AC_INIT(pio, 2.4.2-development) AC_CONFIG_SRCDIR(src/clib/pio_darray.c) AM_INIT_AUTOMAKE([foreign serial-tests])