Skip to content

Commit

Permalink
Merge pull request ESMCI#1416 from NCAR/ejh_perf_4
Browse files Browse the repository at this point in the history
more performance test development for async
  • Loading branch information
edhartnett authored May 14, 2019
2 parents 4a57b6c + 7c58bf2 commit 230fadd
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 90 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
## Ed Hartnett 8/16/17

# Initialize autoconf and automake.
AC_INIT(pio, 2.4.2)
AC_INIT(pio, 2.4.2-development)
AC_CONFIG_SRCDIR(src/clib/pio_darray.c)
AM_INIT_AUTOMAKE([foreign serial-tests])

Expand Down
197 changes: 108 additions & 89 deletions tests/cunit/test_async_perf.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* This program tests darrays with async.
* This program tests performance of darray writes with async.
*
* @author Ed Hartnett
* @date 5/4/17
Expand Down Expand Up @@ -36,16 +36,21 @@
#define LON_LEN 3

/* The length of our sample data along each dimension. */
#define X_DIM_LEN 1024
#define Y_DIM_LEN 1024
#define Z_DIM_LEN 124
#define X_DIM_LEN 4
#define Y_DIM_LEN 4
#define Z_DIM_LEN 4

/* The number of timesteps of data to write. */
#define NUM_TIMESTEPS 3

/* Name of record test var. */
#define REC_VAR_NAME "Duncan_McCloud_of_the_clan_McCloud"

/* How many different number of IO tasks to check? */
#define MAX_IO_TESTS 5

#define COMPONENT_COUNT 1

char dim_name[NDIM4][PIO_MAX_NAME + 1] = {"unlim", "x", "y", "z"};

/* Length of the dimension. */
Expand All @@ -65,27 +70,29 @@ char dim_name[NDIM4][PIO_MAX_NAME + 1] = {"unlim", "x", "y", "z"};
* @param ioid a pointer that gets the ID of this decomposition.
* @returns 0 for success, error code otherwise.
**/
int create_decomposition_3d(int ntasks, int my_rank, int iosysid, int *ioid)
int create_decomposition_3d(int ntasks, int my_rank, int iosysid, int *ioid, PIO_Offset *elements_per_pe)
{
PIO_Offset elements_per_pe; /* Array elements per processing unit. */
PIO_Offset my_elem_per_pe; /* Array elements per processing unit. */
PIO_Offset *compdof; /* The decomposition mapping. */
int dim_len_3d[NDIM3] = {X_DIM_LEN, Y_DIM_LEN, Z_DIM_LEN};
int my_proc_rank = my_rank - 1;
int ret;

/* How many data elements per task? */
elements_per_pe = X_DIM_LEN * Y_DIM_LEN * Z_DIM_LEN / ntasks;
my_elem_per_pe = X_DIM_LEN * Y_DIM_LEN * Z_DIM_LEN / ntasks;
if (elements_per_pe)
*elements_per_pe = my_elem_per_pe;

/* Allocate space for the decomposition array. */
if (!(compdof = malloc(elements_per_pe * sizeof(PIO_Offset))))
if (!(compdof = malloc(my_elem_per_pe * sizeof(PIO_Offset))))
return PIO_ENOMEM;

/* Describe the decomposition. */
for (int i = 0; i < elements_per_pe; i++)
compdof[i] = my_proc_rank * elements_per_pe + i;
for (int i = 0; i < my_elem_per_pe; i++)
compdof[i] = my_proc_rank * my_elem_per_pe + i;

/* Create the PIO decomposition for this test. */
if ((ret = PIOc_init_decomp(iosysid, PIO_INT, NDIM3, dim_len_3d, elements_per_pe,
if ((ret = PIOc_init_decomp(iosysid, PIO_INT, NDIM3, dim_len_3d, my_elem_per_pe,
compdof, ioid, 0, NULL, NULL)))
ERR(ret);

Expand All @@ -97,20 +104,20 @@ int create_decomposition_3d(int ntasks, int my_rank, int iosysid, int *ioid)

/* Run a simple test using darrays with async. */
int
run_darray_async_test(int iosysid, int fmt, int my_rank, int ntasks, MPI_Comm test_comm,
MPI_Comm comp_comm, int *flavor, int piotype)
run_darray_async_test(int iosysid, int fmt, int my_rank, int ntasks, int niotasks,
MPI_Comm test_comm, MPI_Comm comp_comm, int *flavor, int piotype)
{
int ioid3;
int dim_len[NDIM4] = {NC_UNLIMITED, X_DIM_LEN, Y_DIM_LEN, Z_DIM_LEN};
PIO_Offset elements_per_pe2 = X_DIM_LEN * Y_DIM_LEN * Z_DIM_LEN / 3;
PIO_Offset elements_per_pe2;
char decomp_filename[PIO_MAX_NAME + 1];
int niotasks = 1;
int ret;

sprintf(decomp_filename, "decomp_rdat_%s_.nc", TEST_NAME);

/* Decompose the data over the tasks. */
if ((ret = create_decomposition_3d(ntasks - niotasks, my_rank, iosysid, &ioid3)))
if ((ret = create_decomposition_3d(ntasks - niotasks, my_rank, iosysid, &ioid3,
&elements_per_pe2)))
return ret;

{
Expand Down Expand Up @@ -185,12 +192,6 @@ run_darray_async_test(int iosysid, int fmt, int my_rank, int ntasks, MPI_Comm te
return ret;
}

/* Initialize with task 0 as IO task, tasks 1-3 as a
* computation component. */
#define NUM_IO_PROCS 1
#define NUM_COMPUTATION_PROCS 3
#define COMPONENT_COUNT 1

/* Run Tests for pio_spmd.c functions. */
int main(int argc, char **argv)
{
Expand All @@ -200,17 +201,17 @@ int main(int argc, char **argv)
int flavor[NUM_FLAVORS]; /* iotypes for the supported netCDF IO flavors. */
MPI_Comm test_comm; /* A communicator for this test. */
int iosysid;

int num_computation_procs = NUM_COMPUTATION_PROCS;
int num_computation_procs;
MPI_Comm io_comm; /* Will get a duplicate of IO communicator. */
MPI_Comm comp_comm[COMPONENT_COUNT]; /* Will get duplicates of computation communicators. */
int niotasks = NUM_IO_PROCS;
int num_io_procs[MAX_IO_TESTS] = {1, 4, 16, 64, 128}; /* Number of processors that will do IO. */
int num_io_tests; /* How many different num IO procs to try? */
int mpierr;
int fmt, niotest;
int ret; /* Return code. */

/* Initialize test. */
if ((ret = pio_test_init2(argc, argv, &my_rank, &ntasks, MIN_NTASKS,
TARGET_NTASKS, -1, &test_comm)))
if ((ret = pio_test_init2(argc, argv, &my_rank, &ntasks, 1, 0, -1, &test_comm)))
ERR(ERR_INIT);
if ((ret = PIOc_set_iosystem_error_handling(PIO_DEFAULT, PIO_RETURN_ERROR, NULL)))
return ret;
Expand All @@ -219,74 +220,92 @@ int main(int argc, char **argv)
if ((ret = get_iotypes(&num_flavors, flavor)))
ERR(ret);


for (int fmt = 0; fmt < num_flavors; fmt++)
/* How many processors for IO? */
num_io_tests = 1;
if (ntasks >= 32)
num_io_tests = 2;
if (ntasks >= 64)
num_io_tests = 3;
if (ntasks >= 128)
num_io_tests = 4;
if (ntasks >= 512)
num_io_tests = 5;

if (!my_rank)
printf("ntasks\tnio\trearr\tfill\tformat\ttime(s)\tdata size (MB)\t"
"performance(MB/s)\n");

for (niotest = 0; niotest < num_io_tests; niotest++)
{
struct timeval starttime, endtime;
long long startt, endt;
long long delta;
float num_megabytes;
float delta_in_sec;
float mb_per_sec;

/* Start the clock. */
if (!my_rank)
{
gettimeofday(&starttime, NULL);
startt = (1000000 * starttime.tv_sec) + starttime.tv_usec;
}

if ((ret = PIOc_init_async(test_comm, niotasks, NULL, COMPONENT_COUNT,
&num_computation_procs, NULL, &io_comm, comp_comm,
PIO_REARR_BOX, &iosysid)))
ERR(ERR_INIT);
num_computation_procs = ntasks - num_io_procs[niotest];

/* This code runs only on computation components. */
if (my_rank)
for (fmt = 0; fmt < num_flavors; fmt++)
{
/* Run the simple darray async test. */
if ((ret = run_darray_async_test(iosysid, fmt, my_rank, ntasks, test_comm,
comp_comm[0], flavor, PIO_INT)))
return ret;

/* Finalize PIO system. */
if ((ret = PIOc_finalize(iosysid)))
return ret;

/* Free the computation conomponent communicator. */
if ((mpierr = MPI_Comm_free(comp_comm)))
MPIERR(mpierr);
}
else
{
/* Free the IO communicator. */
if ((mpierr = MPI_Comm_free(&io_comm)))
MPIERR(mpierr);
}

if (!my_rank)
{
/* Stop the clock. */
gettimeofday(&endtime, NULL);

/* Compute the time delta */
endt = (1000000 * endtime.tv_sec) + endtime.tv_usec;
delta = (endt - startt)/NUM_TIMESTEPS;
delta_in_sec = (float)delta / 1000000;
num_megabytes = (X_DIM_LEN * Y_DIM_LEN * Z_DIM_LEN * NUM_TIMESTEPS *
sizeof(int))/(1024*1024);
mb_per_sec = num_megabytes / delta_in_sec;
printf("%d\t%d\t%d\t%d\t%d\t%8.3f\t%8.1f\t%8.3f\n", ntasks, niotasks,
1, 0, fmt, delta_in_sec, num_megabytes, mb_per_sec);
}

} /* next fmt */

struct timeval starttime, endtime;
long long startt, endt;
long long delta;
float num_megabytes;
float delta_in_sec;
float mb_per_sec;

/* Start the clock. */
if (!my_rank)
{
gettimeofday(&starttime, NULL);
startt = (1000000 * starttime.tv_sec) + starttime.tv_usec;
}

if ((ret = PIOc_init_async(test_comm, num_io_procs[niotest], NULL, COMPONENT_COUNT,
&num_computation_procs, NULL, &io_comm, comp_comm,
PIO_REARR_BOX, &iosysid)))
ERR(ERR_INIT);

/* This code runs only on computation components. */
if (my_rank >= num_io_procs[niotest])
{
/* Run the simple darray async test. */
if ((ret = run_darray_async_test(iosysid, fmt, my_rank, ntasks, num_io_procs[niotest],
test_comm, comp_comm[0], flavor, PIO_INT)))
return ret;

/* Finalize PIO system. */
if ((ret = PIOc_finalize(iosysid)))
return ret;

/* Free the computation conomponent communicator. */
if ((mpierr = MPI_Comm_free(comp_comm)))
MPIERR(mpierr);
}
else
{
/* Free the IO communicator. */
if ((mpierr = MPI_Comm_free(&io_comm)))
MPIERR(mpierr);
}

if (!my_rank)
{
/* Stop the clock. */
gettimeofday(&endtime, NULL);

/* Compute the time delta */
endt = (1000000 * endtime.tv_sec) + endtime.tv_usec;
delta = (endt - startt)/NUM_TIMESTEPS;
delta_in_sec = (float)delta / 1000000;
num_megabytes = (X_DIM_LEN * Y_DIM_LEN * Z_DIM_LEN * NUM_TIMESTEPS *
sizeof(int))/(1024*1024);
mb_per_sec = num_megabytes / delta_in_sec;
printf("%d\t%d\t%d\t%d\t%d\t%8.3f\t%8.1f\t%8.3f\n", ntasks, num_io_procs[niotest],
1, 0, fmt, delta_in_sec, num_megabytes, mb_per_sec);
}

} /* next fmt */
} /* next niotest */

/* printf("%d %s SUCCESS!!\n", my_rank, TEST_NAME); */
/* Finalize the MPI library. */
if ((ret = pio_test_finalize(&test_comm)))
return ret;

printf("%d %s SUCCESS!!\n", my_rank, TEST_NAME);

return 0;
}

0 comments on commit 230fadd

Please sign in to comment.