From 55b17af76a2c54eb2b2ac8c5517c96d89c2e67cb Mon Sep 17 00:00:00 2001 From: Nicholas Persley Date: Wed, 7 Aug 2024 14:22:01 -0400 Subject: [PATCH 1/2] New input - `deflateLevel` & custom chunking - Addresses issue #425 - Previously, when outputting to netCDFs, the bigger the domain, the longer it took to write output after every simulation run - Created a new output within `Input_nc/desc.in` - "deflateLevel" that specifies the level of deflation that will be sent into `nc_def_var_deflate()` * If the value is 0 (default), variables will not have any kind of deflation * Otherwise, the values 1-9 specify the intensity of deflation that will occur - New variable within SW_NETCDF to hold this new input - set to 0 in `SW_NC_init_ptrs()` - Added custom chunking to every non-dimensional variable (e.g., variables that are not time, vert, lat/lon, x/y, etc.) * Previously, the deflation would provide chunk sizes that encompass the entire domain - E.g., chunk size = [10, 10, 366] for a variable with xy = 10 x 10 sites and time = one leap year - The preferred chunk size for the example above should be chunk size = [1, 1, 366] - New function - `writeDummyVal` which is used to write a dummy value to a variable within the given netCDF and detects which type to write * Only use if `deflateLevel` is 0 --- include/SW_datastructs.h | 4 + src/SW_netCDF.c | 172 +++++++++++++++++++++++++++--- tests/example/Input_nc/desc_nc.in | 1 + 3 files changed, 163 insertions(+), 14 deletions(-) diff --git a/include/SW_datastructs.h b/include/SW_datastructs.h index 6cfc6dde2..19fa22ebe 100644 --- a/include/SW_datastructs.h +++ b/include/SW_datastructs.h @@ -1132,6 +1132,10 @@ typedef struct { int baseCalendarYear; /**< Calendar year that is the reference basis of the time units (e.g., days since YYYY-01-01) of every output netCDFs */ + + /* Specify the deflation level for when creating the output variables */ + int deflateLevel; + } SW_NETCDF; struct SW_OUT_DOM { diff --git a/src/SW_netCDF.c b/src/SW_netCDF.c index 650f7a422..52d17f01a 100644 --- a/src/SW_netCDF.c +++ b/src/SW_netCDF.c @@ -31,7 +31,7 @@ #define NUM_NC_IN_KEYS 2 /** Number of possible keys within `attributes_nc.in` */ -#define NUM_ATT_IN_KEYS 27 +#define NUM_ATT_IN_KEYS 28 /** Progress status: SUID is ready for simulation */ #define PRGRSS_READY ((signed char) 0) @@ -222,7 +222,8 @@ static void nc_read_atts( "proj_false_northing", "strideOutYears", - "baseCalendarYear" + "baseCalendarYear", + "deflateLevel" }; static const Bool requiredKeys[NUM_ATT_IN_KEYS] = { swTRUE, swTRUE, swTRUE, swFALSE, swFALSE, swTRUE, swTRUE, @@ -302,7 +303,7 @@ static void nc_read_atts( // set_hasKey() does not produce errors, only warnings possible /* Check to see if the line number contains a double or integer value */ - doIntConv = (Bool) (keyID >= 23 && keyID <= 25); + doIntConv = (Bool) (keyID >= 23 && keyID <= 27); doDoubleConv = (Bool) ((keyID >= 9 && keyID <= 11) || (keyID >= 15 && keyID <= 17) || (keyID >= 21 && keyID <= 22)); @@ -462,6 +463,9 @@ static void nc_read_atts( case 26: SW_netCDF->baseCalendarYear = inBufintRes; break; + case 27: + SW_netCDF->deflateLevel = inBufintRes; + break; case KEY_NOT_FOUND: default: LogError( @@ -560,6 +564,35 @@ static void get_2d_output_key( } } +/* +@brief Write a dummy value to a newly created netCDF file so that +the first write does not occur during the first simulation run; +this function should only be called when there is no deflate +activated + +@param[in] ncFileID Identifier of the open netCDF file to write to +@param[in] varType Type of the first variable being written to the file +@param[in] varID Identifier of the variable to write to +*/ +static void writeDummyVal(int ncFileID, int varType, int varID) { + size_t start[MAX_NUM_DIMS] = {0}; + size_t count[MAX_NUM_DIMS] = {1, 1, 1, 1, 1}; + double doubleFill[] = {NC_FILL_DOUBLE}; + unsigned char byteFill[] = {(unsigned char) NC_FILL_BYTE}; + + switch (varType) { + case NC_DOUBLE: + nc_put_vara_double(ncFileID, varID, start, count, &doubleFill[0]); + break; + case NC_BYTE: + nc_put_vara_ubyte(ncFileID, varID, start, count, &byteFill[0]); + break; + default: + /* No other types should be expected */ + break; + } +} + /** @brief Get a dimension identifier within a given netCDF @@ -1268,6 +1301,9 @@ static void create_netCDF_dim( @param[in] ncFileID Domain netCDF file ID @param[in] varType The type in which the new variable will be @param[in] numDims Number of dimensions the new variable will hold +@param[in] chunkSizes Custom chunk sizes for the variable being created +@param[in] deflateLevel Level of deflation that will be used for the created +variable @param[out] LogInfo Holds information on warnings and errors */ static void create_netCDF_var( @@ -1277,13 +1313,14 @@ static void create_netCDF_var( const int *ncFileID, int varType, int numDims, + size_t chunkSizes[], + int deflateLevel, LOG_INFO *LogInfo ) { // Deflate information int shuffle = 1; // 0 or 1 int deflate = 1; // 0 or 1 - int level = 5; // 0 to 9 if (nc_def_var(*ncFileID, varName, varType, numDims, dimIDs, varID) != NC_NOERR) { @@ -1297,12 +1334,21 @@ static void create_netCDF_var( return; // Exit prematurely due to error } + if (!isnull(chunkSizes)) { + if (nc_def_var_chunking(*ncFileID, *varID, NC_CHUNKED, chunkSizes) != + NC_NOERR) { + LogError(LogInfo, LOGERROR, "Just here.", varName); + return; // Exit prematurely due to error + } + } + // Do not compress the CRS variables - if (strcmp(varName, "crs_geogsc") != 0 && + if (deflateLevel > 0 && strcmp(varName, "crs_geogsc") != 0 && strcmp(varName, "crs_projsc") != 0 && varType != NC_STRING) { - if (nc_def_var_deflate(*ncFileID, *varID, shuffle, deflate, level) != - NC_NOERR) { + if (nc_def_var_deflate( + *ncFileID, *varID, shuffle, deflate, deflateLevel + ) != NC_NOERR) { LogError( LogInfo, LOGERROR, @@ -1751,6 +1797,8 @@ static void fill_domain_netCDF_vals( @param[in] primCRSIsGeo Specifies if the current CRS type is geographic @param[in] domType Type of domain in which simulations are running (gridcell/sites) +@param[asdf] deflateLevel Level of deflation that will be used for the created +variable @param[out] LogInfo Holds information on warnings and errors */ static void fill_domain_netCDF_domain( @@ -1761,6 +1809,7 @@ static void fill_domain_netCDF_domain( int nDomainDims, Bool primCRSIsGeo, const char *domType, + int deflateLevel, LOG_INFO *LogInfo ) { @@ -1786,6 +1835,8 @@ static void fill_domain_netCDF_domain( &domFileID, NC_UINT, nDomainDims, + NULL, + deflateLevel, LogInfo ); @@ -1822,6 +1873,8 @@ static void fill_domain_netCDF_domain( variable (lat or y) @param[out] XVarID Variable identifier of the X-axis horizontal coordinate variable (lon or x) +@param[asdf] deflateLevel Level of deflation that will be used for the created +variable @param[out] LogInfo Holds information on warnings and errors */ static void fill_domain_netCDF_s( @@ -1831,6 +1884,7 @@ static void fill_domain_netCDF_s( int *sVarID, int *YVarID, int *XVarID, + int deflateLevel, LOG_INFO *LogInfo ) { @@ -1884,6 +1938,8 @@ static void fill_domain_netCDF_s( domFileID, NC_DOUBLE, 1, + NULL, + deflateLevel, LogInfo ); if (LogInfo->stopRun) { @@ -1949,6 +2005,8 @@ static void fill_domain_netCDF_s( bounds variable (lat_bnds or y_bnds) @param[out] XVarID Variable identifier of the X-axis horizontal coordinate bounds variable (lon_bnds or x_bnds) +@param[asdf] deflateLevel Level of deflation that will be used for the created +variable @param[out] LogInfo Holds information on warnings and errors */ static void fill_domain_netCDF_xy( @@ -1960,6 +2018,7 @@ static void fill_domain_netCDF_xy( int *XVarID, int *YBndsID, int *XBndsID, + int deflateLevel, LOG_INFO *LogInfo ) { @@ -2042,6 +2101,8 @@ static void fill_domain_netCDF_xy( domFileID, NC_DOUBLE, 1, + NULL, + deflateLevel, LogInfo ); if (LogInfo->stopRun) { @@ -2058,6 +2119,8 @@ static void fill_domain_netCDF_xy( domFileID, NC_DOUBLE, 2, + NULL, + deflateLevel, LogInfo ); if (LogInfo->stopRun) { @@ -2439,8 +2502,9 @@ static void fill_netCDF_with_invariants( int proj_id = 0; const char *fx = "fx"; + /* Do not deflate crs_geogsc */ create_netCDF_var( - &geo_id, "crs_geogsc", NULL, ncFileID, NC_BYTE, 0, LogInfo + &geo_id, "crs_geogsc", NULL, ncFileID, NC_BYTE, 0, NULL, 0, LogInfo ); if (LogInfo->stopRun) { return; // Exit function prematurely due to error @@ -2460,8 +2524,10 @@ static void fill_netCDF_with_invariants( // Projected CRS variable/attributes if (!SW_netCDF->primary_crs_is_geographic) { + + /* Do not deflate crs_projsc */ create_netCDF_var( - &proj_id, "crs_projsc", NULL, ncFileID, NC_BYTE, 0, LogInfo + &proj_id, "crs_projsc", NULL, ncFileID, NC_BYTE, 0, NULL, 0, LogInfo ); if (LogInfo->stopRun) { return; // Exit function prematurely due to error @@ -2524,6 +2590,8 @@ the variable "time_bnds" and fills the variable "time" @param[in,out] startTime Start number of days when dealing with years between netCDF files @param[in] pd Current output netCDF period +@param[adf] deflateLevel Level of deflation that will be used for the created +variable @param[out] LogInfo Holds information dealing with logfile output */ static void create_time_vars( @@ -2534,6 +2602,7 @@ static void create_time_vars( unsigned int startYr, double *startTime, OutPeriod pd, + int deflateLevel, LOG_INFO *LogInfo ) { @@ -2550,7 +2619,15 @@ static void create_time_vars( create_netCDF_var( - &bndsID, "time_bnds", dimIDs, &ncFileID, NC_DOUBLE, numBnds, LogInfo + &bndsID, + "time_bnds", + dimIDs, + &ncFileID, + NC_DOUBLE, + numBnds, + NULL, + deflateLevel, + LogInfo ); if (LogInfo->stopRun) { return; // Exit function prematurely due to error @@ -2649,6 +2726,8 @@ the variable "vertical_bnds" and fills the variable "vertical" run within domain have identical soil layer depths (though potentially variable number of soil layers) @param[in] lyrDepths Depths of soil layers (cm) +@param[iasdf] deflateLevel Level of deflation that will be used for the created +variable @param[out] LogInfo Holds information dealing with logfile output */ static void create_vert_vars( @@ -2658,6 +2737,7 @@ static void create_vert_vars( int dimVarID, Bool hasConsistentSoilLayerDepths, const double lyrDepths[], + int deflateLevel, LOG_INFO *LogInfo ) { @@ -2676,6 +2756,8 @@ static void create_vert_vars( &ncFileID, NC_DOUBLE, numBnds, + NULL, + deflateLevel, LogInfo ); if (LogInfo->stopRun) { @@ -2747,6 +2829,8 @@ if needed years between netCDF files @param[in] startYr Start year of the simulation @param[in] pd Current output netCDF period +@param[adsf] deflateLevel Level of deflation that will be used for the created +variable @param[out] LogInfo Holds information dealing with logfile output */ static void fill_dimVar( @@ -2760,6 +2844,7 @@ static void fill_dimVar( int dimNum, unsigned int startYr, OutPeriod pd, + int deflateLevel, LOG_INFO *LogInfo ) { @@ -2791,6 +2876,7 @@ static void fill_dimVar( varID, hasConsistentSoilLayerDepths, lyrDepths, + deflateLevel, LogInfo ); } @@ -2804,6 +2890,7 @@ static void fill_dimVar( startYr, startTime, pd, + deflateLevel, LogInfo ); } @@ -2832,6 +2919,8 @@ and fill the variable with the respective information @param[in] baseCalendarYear First year of the entire simulation @param[in] startYr Start year of the simulation @param[in] pd Current output netCDF period +@param[in] deflateLevel Level of deflation that will be used for the created +variable @param[out] LogInfo Holds information dealing with logfile output */ static void create_output_dimVar( @@ -2845,6 +2934,7 @@ static void create_output_dimVar( unsigned int baseCalendarYear, unsigned int startYr, OutPeriod pd, + int deflateLevel, LOG_INFO *LogInfo ) { @@ -2904,7 +2994,15 @@ static void create_output_dimVar( dimIDs[0] = *dimID; create_netCDF_var( - &varID, name, dimIDs, &ncFileID, varType, numDims, LogInfo + &varID, + name, + dimIDs, + &ncFileID, + varType, + numDims, + NULL, + deflateLevel, + LogInfo ); if (LogInfo->stopRun) { return; // Exit function prematurely due to error @@ -2921,6 +3019,7 @@ static void create_output_dimVar( dimNum, startYr, pd, + deflateLevel, LogInfo ); @@ -2982,6 +3081,8 @@ and writing attributes @param[in] baseCalendarYear First year of the entire simulation @param[in] startYr Start year of the simulation @param[in] pd Current output netCDF period +@param[in] deflateLevel Level of deflation that will be used for the created +variable @param[in,out] LogInfo Holds information dealing with logfile output */ static void create_full_var( @@ -3001,6 +3102,7 @@ static void create_full_var( unsigned int baseCalendarYear, unsigned int startYr, OutPeriod pd, + int deflateLevel, LOG_INFO *LogInfo ) { @@ -3018,7 +3120,8 @@ static void create_full_var( char *dimVarName; size_t timeVertVegVals[] = {timeSize, vertSize, pftSize}; unsigned int numTimeVertVegVals = 3; - unsigned int varVal; + unsigned int varVal = 0; + size_t chunkSizes[MAX_NUM_DIMS] = {1, 1, 1, 1, 1}; for (index = 0; index < numConstDims; index++) { @@ -3048,6 +3151,7 @@ static void create_full_var( baseCalendarYear, startYr, pd, + deflateLevel, LogInfo ); } else { @@ -3063,9 +3167,26 @@ static void create_full_var( } } + for (index = numConstDims; index < MAX_NUM_DIMS; index++) { + if (index - numConstDims < 3) { + varVal = timeVertVegVals[index - numConstDims]; + + if (varVal > 0) { + chunkSizes[index] = varVal; + } + } + } create_netCDF_var( - &varID, varName, dimIDs, ncFileID, newVarType, dimArrSize, LogInfo + &varID, + varName, + dimIDs, + ncFileID, + newVarType, + dimArrSize, + chunkSizes, + deflateLevel, + LogInfo ); if (LogInfo->stopRun) { return; // Exit function prematurely due to error @@ -3079,6 +3200,13 @@ static void create_full_var( return; // Exit function prematurely due to error } } + + if (deflateLevel == 0) { + /* Write a dummy value so that the first write is not in the sim loop; + otherwise, the first simulation loop takes an order of magnitude + longer than following simulations */ + writeDummyVal(*ncFileID, newVarType, varID); + } } /** @@ -3213,6 +3341,8 @@ SW_OUTNPERIODS). @param[in] baseCalendarYear First year of the entire simulation @param[in,out] startTime Start number of days when dealing with years between netCDF files (returns updated value) +@param[d] deflateLevel Level of deflation that will be used for the created +variable @param[in] LogInfo Holds information on warnings and errors */ static void create_output_file( @@ -3231,6 +3361,7 @@ static void create_output_file( unsigned int startYr, int baseCalendarYear, double *startTime, + int deflateLevel, LOG_INFO *LogInfo ) { @@ -3307,6 +3438,7 @@ static void create_output_file( baseCalendarYear, startYr, pd, + deflateLevel, LogInfo ); @@ -3878,6 +4010,7 @@ void SW_NC_create_output_files( rangeStart, baseCalendarYear, &startTime[pd], + SW_Domain->netCDFInfo.deflateLevel, LogInfo ); if (LogInfo->stopRun) { @@ -4388,7 +4521,14 @@ void SW_NC_create_domain_template( // Create s dimension/domain variables fill_domain_netCDF_s( - SW_Domain, domFileID, &sDimID, &sVarID, &YVarID, &XVarID, LogInfo + SW_Domain, + domFileID, + &sDimID, + &sVarID, + &YVarID, + &XVarID, + SW_Domain->netCDFInfo.deflateLevel, + LogInfo ); if (LogInfo->stopRun) { @@ -4410,6 +4550,7 @@ void SW_NC_create_domain_template( &XVarID, &YBndsID, &XBndsID, + SW_Domain->netCDFInfo.deflateLevel, LogInfo ); @@ -4431,6 +4572,7 @@ void SW_NC_create_domain_template( nDomainDims, SW_netCDF->primary_crs_is_geographic, SW_Domain->DomainType, + SW_Domain->netCDFInfo.deflateLevel, LogInfo ); if (LogInfo->stopRun) { @@ -4604,6 +4746,7 @@ void SW_NC_create_progress(SW_DOMAIN *SW_Domain, LOG_INFO *LogInfo) { 0, 0, 0, + SW_Domain->netCDFInfo.deflateLevel, LogInfo ); @@ -5326,6 +5469,7 @@ void SW_NC_init_ptrs(SW_NETCDF *SW_netCDF) { SW_netCDF->crs_projsc.standard_parallel[1] = NAN; SW_netCDF->strideOutYears = -1; + SW_netCDF->deflateLevel = 0; for (index = 0; index < numAllocVars; index++) { *allocArr[index] = NULL; diff --git a/tests/example/Input_nc/desc_nc.in b/tests/example/Input_nc/desc_nc.in index af6397b12..2ed314c96 100644 --- a/tests/example/Input_nc/desc_nc.in +++ b/tests/example/Input_nc/desc_nc.in @@ -52,3 +52,4 @@ proj_false_northing 0 #------ Output file information strideOutYears 20 # Specifies the number of years to put into a single output netCDF -- may be 1, X (e.g., 10), or "Inf" (all years in one file), defaults to "Inf" if this line is not found baseCalendarYear 1980 +deflateLevel 0 # Specifies how much output files will be deflated - may be 0 (off) or 1-9 (intensity of deflation), defaults to 0 if this line is not found \ No newline at end of file From 2e109a88e90e6a2a81d6e044455e26f46723eb7b Mon Sep 17 00:00:00 2001 From: Nicholas Persley Date: Wed, 7 Aug 2024 17:07:47 -0400 Subject: [PATCH 2/2] Correct missed documentation and error message from development --- src/SW_netCDF.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/SW_netCDF.c b/src/SW_netCDF.c index 52d17f01a..93dd7c939 100644 --- a/src/SW_netCDF.c +++ b/src/SW_netCDF.c @@ -1337,7 +1337,14 @@ static void create_netCDF_var( if (!isnull(chunkSizes)) { if (nc_def_var_chunking(*ncFileID, *varID, NC_CHUNKED, chunkSizes) != NC_NOERR) { - LogError(LogInfo, LOGERROR, "Just here.", varName); + + LogError( + LogInfo, + LOGERROR, + "Could not chunk variable '%s' when creating it in " + "output netCDF.", + varName + ); return; // Exit prematurely due to error } } @@ -1797,7 +1804,7 @@ static void fill_domain_netCDF_vals( @param[in] primCRSIsGeo Specifies if the current CRS type is geographic @param[in] domType Type of domain in which simulations are running (gridcell/sites) -@param[asdf] deflateLevel Level of deflation that will be used for the created +@param[in] deflateLevel Level of deflation that will be used for the created variable @param[out] LogInfo Holds information on warnings and errors */ @@ -1873,7 +1880,7 @@ static void fill_domain_netCDF_domain( variable (lat or y) @param[out] XVarID Variable identifier of the X-axis horizontal coordinate variable (lon or x) -@param[asdf] deflateLevel Level of deflation that will be used for the created +@param[in] deflateLevel Level of deflation that will be used for the created variable @param[out] LogInfo Holds information on warnings and errors */ @@ -2005,7 +2012,7 @@ static void fill_domain_netCDF_s( bounds variable (lat_bnds or y_bnds) @param[out] XVarID Variable identifier of the X-axis horizontal coordinate bounds variable (lon_bnds or x_bnds) -@param[asdf] deflateLevel Level of deflation that will be used for the created +@param[in] deflateLevel Level of deflation that will be used for the created variable @param[out] LogInfo Holds information on warnings and errors */ @@ -2590,7 +2597,7 @@ the variable "time_bnds" and fills the variable "time" @param[in,out] startTime Start number of days when dealing with years between netCDF files @param[in] pd Current output netCDF period -@param[adf] deflateLevel Level of deflation that will be used for the created +@param[in] deflateLevel Level of deflation that will be used for the created variable @param[out] LogInfo Holds information dealing with logfile output */ @@ -2726,7 +2733,7 @@ the variable "vertical_bnds" and fills the variable "vertical" run within domain have identical soil layer depths (though potentially variable number of soil layers) @param[in] lyrDepths Depths of soil layers (cm) -@param[iasdf] deflateLevel Level of deflation that will be used for the created +@param[in] deflateLevel Level of deflation that will be used for the created variable @param[out] LogInfo Holds information dealing with logfile output */ @@ -2829,7 +2836,7 @@ if needed years between netCDF files @param[in] startYr Start year of the simulation @param[in] pd Current output netCDF period -@param[adsf] deflateLevel Level of deflation that will be used for the created +@param[in] deflateLevel Level of deflation that will be used for the created variable @param[out] LogInfo Holds information dealing with logfile output */ @@ -3341,7 +3348,7 @@ SW_OUTNPERIODS). @param[in] baseCalendarYear First year of the entire simulation @param[in,out] startTime Start number of days when dealing with years between netCDF files (returns updated value) -@param[d] deflateLevel Level of deflation that will be used for the created +@param[in] deflateLevel Level of deflation that will be used for the created variable @param[in] LogInfo Holds information on warnings and errors */