Skip to content

Commit

Permalink
Merge pull request ESMCI#1618 from NCAR/ejh_ncint_perf
Browse files Browse the repository at this point in the history
For netCDF integration opens/creates of PIO_IOTYPE_NETCDF4C, don't automatically turn on deflation for every var
  • Loading branch information
edwardhartnett authored Dec 3, 2019
2 parents 751ff3d + 65bef9b commit b9b900b
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 5 deletions.
5 changes: 5 additions & 0 deletions src/clib/pio.h
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,11 @@ typedef struct file_desc_t
/** True if this task should participate in IO (only true for one
* task with netcdf serial files. */
int do_io;

/** True if this file was opened with the netCDF integration
* feature. One consequence is that PIO_IOTYPE_NETCDF4C files will
* not have deflate automatically turned on for each var. */
int ncint_file;
} file_desc_t;

/**
Expand Down
6 changes: 4 additions & 2 deletions src/clib/pio_nc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2278,8 +2278,10 @@ PIOc_def_var(int ncid, const char *name, nc_type xtype, int ndims,
PLOG((3, "defined var ierr %d file->iotype %d", ierr, file->iotype));

#ifdef _NETCDF4
/* For netCDF-4 serial files, turn on compression for this variable. */
if (!ierr && file->iotype == PIO_IOTYPE_NETCDF4C)
/* For netCDF-4 serial files, turn on compression for this
* variable, unless this file was opened through the netCDF
* integration feature. */
if (!ierr && file->iotype == PIO_IOTYPE_NETCDF4C && !file->ncint_file)
ierr = nc_def_var_deflate(file->fh, varid, 0, 1, 1);

/* For netCDF-4 parallel files, set parallel access to collective. */
Expand Down
2 changes: 2 additions & 0 deletions src/clib/pioc_support.c
Original file line number Diff line number Diff line change
Expand Up @@ -2134,6 +2134,7 @@ PIOc_createfile_int(int iosysid, int *ncidp, int *iotype, const char *filename,
if ((ierr = nc4_file_change_ncid(*ncidp, file->pio_ncid)))
return pio_err(NULL, file, ierr, __FILE__, __LINE__);
file->pio_ncid = file->pio_ncid << ID_SHIFT;
file->ncint_file++;
PLOG((2, "changed ncid to file->pio_ncid = %d", file->pio_ncid));
}
#endif /* NETCDF_INTEGRATION */
Expand Down Expand Up @@ -2807,6 +2808,7 @@ PIOc_openfile_retry(int iosysid, int *ncidp, int *iotype, const char *filename,
if ((ierr = nc4_file_change_ncid(*ncidp, file->pio_ncid)))
return pio_err(NULL, file, ierr, __FILE__, __LINE__);
file->pio_ncid = file->pio_ncid << ID_SHIFT;
file->ncint_file++;
PLOG((2, "changed ncid to file->pio_ncid = %d", file->pio_ncid));
}
else
Expand Down
8 changes: 5 additions & 3 deletions tests/ncint/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,21 @@ AM_CPPFLAGS = -I$(top_srcdir)/src/clib
LDADD = ${top_builddir}/src/clib/libpioc.la

# Build the test for make check.
check_PROGRAMS = tst_pio_udf tst_pio_async tst_async_multi
check_PROGRAMS = tst_pio_udf tst_pio_async tst_async_multi \
tst_ncint_perf

tst_pio_udf_SOURCES = tst_pio_udf.c pio_err_macros.h
tst_pio_async_SOURCES = tst_pio_async.c pio_err_macros.h
tst_async_multi_SOURCES = tst_async_multi.c pio_err_macros.h
tst_ncint_perf_SOURCES = tst_ncint_perf.c pio_err_macros.h

if RUN_TESTS
# Tests will run from a bash script.
TESTS = run_tests.sh
TESTS = run_tests.sh run_perf.sh
endif # RUN_TESTS

# Distribute the test script.
EXTRA_DIST = run_tests.sh
EXTRA_DIST = run_tests.sh run_perf.sh

# Clean up files produced during testing.
CLEANFILES = *.nc *.log
2 changes: 2 additions & 0 deletions tests/ncint/pio_err_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,6 @@ static int total_err = 0, err = 0;
return 0; \
} while (0)

#define ERR_WRONG 99

#endif /* _PIO_ERR_MACROS_H */
33 changes: 33 additions & 0 deletions tests/ncint/run_perf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/sh

# This is a test script for PIO. It runs performance tests for the
# netCDF intergration of PIO.

# Ed Hartnett

# Stop execution of script if error is returned.
set -e

# Stop loop if ctrl-c is pressed.
trap exit INT TERM

printf 'running PIO performance tests...\n'

PIO_TESTS='tst_ncint_perf'

success1=true
for TEST in $PIO_TESTS
do
success1=false
echo "running ${TEST}"
mpiexec -n 4 ./${TEST} && success1=true
if test $success1 = false; then
break
fi
done

# Did we succeed?
if test x$success1 = xtrue; then
exit 0
fi
exit 1
147 changes: 147 additions & 0 deletions tests/ncint/tst_ncint_perf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/* Test netcdf integration layer.
This is a performance test of async mode in PIO, using the netCDF
integration layer.
Ed Hartnett
12/2/19
*/

#include "config.h"
#include <pio.h>
#include <sys/time.h>
#include "pio_err_macros.h"

#define FILE_NAME "tst_pio_async.nc"
#define VAR_NAME "data_var"
#define DIM_NAME_UNLIMITED "dim_unlimited"
#define DIM_NAME_X "dim_x"
#define DIM_NAME_Y "dim_y"
#define DIM_LEN_X 3072
#define DIM_LEN_Y 1536
/* #define DIM_LEN_X 3 */
/* #define DIM_LEN_Y 4 */
#define NDIM2 2
#define NDIM3 3
#define NUM_TIMESTEPS 1

extern NC_Dispatch NCINT_dispatcher;

/* Number of computational components to create. */
#define COMPONENT_COUNT 1

int
main(int argc, char **argv)
{
int my_rank;
int ntasks;

/* Initialize MPI. */
if (MPI_Init(&argc, &argv)) PERR;

/* Learn my rank and the total number of processors. */
if (MPI_Comm_rank(MPI_COMM_WORLD, &my_rank)) PERR;
if (MPI_Comm_size(MPI_COMM_WORLD, &ntasks)) PERR;

if (!my_rank)
printf("\n*** Testing netCDF integration PIO performance.\n");
if (!my_rank)
printf("*** testing simple async use of netCDF integration layer...");
{
int ncid, ioid;
int dimid[NDIM3], varid;
int dimlen[NDIM3] = {NC_UNLIMITED, DIM_LEN_X, DIM_LEN_Y};
int iosysid;
size_t elements_per_pe;
size_t *compdof; /* The decomposition mapping. */
int *my_data;
int *data_in;
int num_procs2[COMPONENT_COUNT] = {3};
int num_io_procs = 1;
int i;

/* Turn on logging for PIO library. */
/* PIOc_set_log_level(4); */
/* if (!my_rank) */
/* nc_set_log_level(3); */

/* Initialize the intracomm. The IO task will not return from
* this call until the PIOc_finalize() is called by the
* compute tasks. */
if (nc_def_async(MPI_COMM_WORLD, num_io_procs, NULL, COMPONENT_COUNT,
num_procs2, NULL, NULL, NULL, PIO_REARR_BOX, &iosysid))
PERR;

if (my_rank)
{
struct timeval starttime, endtime;
long long startt, endt;
long long delta;
float num_megabytes = DIM_LEN_X * DIM_LEN_Y * sizeof(int) / (float)1000000 * NUM_TIMESTEPS;
float delta_in_sec;
float mb_per_sec;
int t;

/* Create a file with a 3D record var. */
if (nc_create(FILE_NAME, NC_PIO|NC_NETCDF4, &ncid)) PERR;
if (nc_def_dim(ncid, DIM_NAME_UNLIMITED, dimlen[0], &dimid[0])) PERR;
if (nc_def_dim(ncid, DIM_NAME_X, dimlen[1], &dimid[1])) PERR;
if (nc_def_dim(ncid, DIM_NAME_Y, dimlen[2], &dimid[2])) PERR;
if (nc_def_var(ncid, VAR_NAME, NC_INT, NDIM3, dimid, &varid)) PERR;
if (nc_enddef(ncid)) PERR;

/* Calculate a decomposition for distributed arrays. */
elements_per_pe = DIM_LEN_X * DIM_LEN_Y / (ntasks - num_io_procs);
/* printf("my_rank %d elements_per_pe %ld\n", my_rank, elements_per_pe); */

if (!(compdof = malloc(elements_per_pe * sizeof(size_t))))
PERR;
for (i = 0; i < elements_per_pe; i++)
{
compdof[i] = (my_rank - num_io_procs) * elements_per_pe + i;
/* printf("my_rank %d compdof[%d]=%ld\n", my_rank, i, compdof[i]); */
}

/* Create the PIO decomposition for this test. */
if (nc_def_decomp(iosysid, PIO_INT, NDIM2, &dimlen[1], elements_per_pe,
compdof, &ioid, 1, NULL, NULL)) PERR;
free(compdof);

/* Create some data on this processor. */
if (!(my_data = malloc(elements_per_pe * sizeof(int)))) PERR;
for (i = 0; i < elements_per_pe; i++)
my_data[i] = my_rank * 10 + i;

/* Start the clock. */
gettimeofday(&starttime, NULL);

/* Write some data with distributed arrays. */
for (t = 0; t < NUM_TIMESTEPS; t++)
if (nc_put_vard_int(ncid, varid, ioid, t, my_data)) PERR;
if (nc_close(ncid)) PERR;

/* Stop the clock. */
gettimeofday(&endtime, NULL);

/* Compute the time delta */
startt = (1000000 * starttime.tv_sec) + starttime.tv_usec;
endt = (1000000 * endtime.tv_sec) + endtime.tv_usec;
delta = (endt - startt)/NUM_TIMESTEPS;
delta_in_sec = (float)delta / 1000000;
mb_per_sec = num_megabytes / delta_in_sec;
if (my_rank == num_io_procs)
printf("\n%d\t%d\t%d\t%d\t%d\t%8.3f\t%8.1f\t%8.3f\n", ntasks, num_io_procs,
1, 0, 1, delta_in_sec, num_megabytes, mb_per_sec);

free(my_data);
if (nc_free_decomp(ioid)) PERR;
if (nc_free_iosystem(iosysid)) PERR;
}
}
if (!my_rank)
PSUMMARIZE_ERR;

/* Finalize MPI. */
MPI_Finalize();
PFINAL_RESULTS;
}

0 comments on commit b9b900b

Please sign in to comment.