Merge pull request #1732 from slevisconsulting/new_soiltex_slevis

New soiltex for ctsm5.2.mksurfdata, plus rimport all other new raw datasets and start test simulations
ESCOMP · Oct 4, 2022 · 1f84b84 · 1f84b84
2 parents 382e0cc + ea5dbae
commit 1f84b84
Show file tree

Hide file tree

Showing 16 changed files with 761 additions and 603 deletions.
diff --git a/bld/CLMBuildNamelist.pm b/bld/CLMBuildNamelist.pm
@@ -2645,6 +2645,13 @@ sub setup_logic_do_transient_lakes {
 
    my $var = 'do_transient_lakes';
 
+   # Start by assuming a default value of '.true.'. Then check a number of
+   # conditions under which do_transient_lakes cannot be true. Under these
+   # conditions: (1) set default value to '.false.'; (2) make sure that the
+   # value is indeed false (e.g., that the user didn't try to set it to true).
+
+   my $default_val = ".true.";
+
    # cannot_be_true will be set to a non-empty string in any case where
    # do_transient_lakes should not be true; if it turns out that
    # do_transient_lakes IS true in any of these cases, a fatal error will be
@@ -2668,7 +2675,7 @@ sub setup_logic_do_transient_lakes {
       # Note that, if the variable cannot be true, we don't call add_default
       # - so that we don't clutter up the namelist with variables that don't
       # matter for this case
-      add_default($opts, $nl_flags->{'inputdata_rootdir'}, $definition, $defaults, $nl, $var);
+      add_default($opts, $nl_flags->{'inputdata_rootdir'}, $definition, $defaults, $nl, $var, val=>$default_val);
    }
 
    # Make sure the value is false when it needs to be false - i.e., that the
@@ -2708,6 +2715,13 @@ sub setup_logic_do_transient_urban {
 
    my $var = 'do_transient_urban';
 
+   # Start by assuming a default value of '.true.'. Then check a number of
+   # conditions under which do_transient_urban cannot be true. Under these
+   # conditions: (1) set default value to '.false.'; (2) make sure that the
+   # value is indeed false (e.g., that the user didn't try to set it to true).
+
+   my $default_val = ".true.";
+
    # cannot_be_true will be set to a non-empty string in any case where
    # do_transient_urban should not be true; if it turns out that
    # do_transient_urban IS true in any of these cases, a fatal error will be
@@ -2731,7 +2745,7 @@ sub setup_logic_do_transient_urban {
       # Note that, if the variable cannot be true, we don't call add_default
       # - so that we don't clutter up the namelist with variables that don't
       # matter for this case
-      add_default($opts, $nl_flags->{'inputdata_rootdir'}, $definition, $defaults, $nl, $var);
+      add_default($opts, $nl_flags->{'inputdata_rootdir'}, $definition, $defaults, $nl, $var, val=>$default_val);
    }
 
    # Make sure the value is false when it needs to be false - i.e., that the

diff --git a/cime_config/testdefs/testmods_dirs/clm/smallville_dynlakes_monthly/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/smallville_dynlakes_monthly/user_nl_clm
@@ -1,7 +1,7 @@
 do_transient_lakes = .true.
 
 ! This file was created with the following command:
-! ncap2 -s 'PCT_LAKE=array(0.0,0.0,PCT_CROP); PCT_LAKE={0.,50.,25.,25.,25.,25.}; HASLAKE=array(1.,1.,AREA); PCT_CROP=array(0.0,0.0,PCT_LAKE); PCT_CROP={0.,25.,12.,12.,12.,12.}' landuse.timeseries_1x1_smallvilleIA_hist_78pfts_simyr1850-1855_c160127.nc landuse.timeseries_1x1_smallvilleIA_hist_78pfts_simyr1850-1855_dynLakes_c200928.nc
+! ncap2 -s 'PCT_LAKE=array(0.0,0.0,PCT_CROP); PCT_LAKE={0.,50.,25.,25.,25.,25.}; PCT_LAKE_MAX=array(1.,1.,AREA); PCT_CROP=array(0.0,0.0,PCT_LAKE); PCT_CROP={0.,25.,12.,12.,12.,12.}' landuse.timeseries_1x1_smallvilleIA_hist_78pfts_simyr1850-1855_c160127.nc landuse.timeseries_1x1_smallvilleIA_hist_78pfts_simyr1850-1855_dynLakes_c200928.nc
 ! Key points are that lake area starts as 0, increases after the first year, then decreases after the second year.
 ! PCT_CROP is also changed so that PCT_LAKE + PCT_CROP <= 100. (Here, PCT_CROP increases and decreases at the same time as PCT_LAKE in order to exercise the simultaneous increase or decrease of two landunits, but that isn't a critical part of this test.)
 ! Note that the use of this file means that this testmod can only be used with the 1x1_smallvilleIA grid.

diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90
@@ -16,7 +16,7 @@ module clm_initializeMod
   use clm_varctl            , only : use_lch4, use_cn, use_cndv, use_c13, use_c14, use_fates
   use clm_varctl            , only : use_soil_moisture_streams
   use clm_instur            , only : wt_lunit, urban_valid, wt_nat_patch, wt_cft, fert_cft
-  use clm_instur            , only : irrig_method, wt_glc_mec, topo_glc_mec, haslake, pct_urban_max
+  use clm_instur            , only : irrig_method, wt_glc_mec, topo_glc_mec, pct_lake_max, pct_urban_max
   use perf_mod              , only : t_startf, t_stopf
   use readParamsMod         , only : readParameters
   use ncdio_pio             , only : file_desc_t
@@ -218,7 +218,7 @@ subroutine initialize2(ni,nj)
     allocate (irrig_method (begg:endg, cft_lb:cft_ub       ))
     allocate (wt_glc_mec   (begg:endg, maxpatch_glc     ))
     allocate (topo_glc_mec (begg:endg, maxpatch_glc     ))
-    allocate (haslake      (begg:endg                      ))
+    allocate (pct_lake_max (begg:endg                      ))
     allocate (pct_urban_max(begg:endg, numurbl             ))
 
     ! Read list of Patches and their corresponding parameter values
@@ -295,7 +295,7 @@ subroutine initialize2(ni,nj)
     ! Some things are kept until the end of initialize2; urban_valid is kept through the
     ! end of the run for error checking, pct_urban_max is kept through the end of the run
     ! for reweighting in subgridWeights.
-    deallocate (wt_lunit, wt_cft, wt_glc_mec, haslake)
+    deallocate (wt_lunit, wt_cft, wt_glc_mec, pct_lake_max)
 
     ! Determine processor bounds and clumps for this processor
     call get_proc_bounds(bounds_proc)

diff --git a/src/main/clm_varsur.F90 b/src/main/clm_varsur.F90
@@ -47,7 +47,7 @@ module clm_instur
   real(r8), pointer :: topo_glc_mec(:,:) 
 
   ! whether we have lake to initialise in each grid cell
-  logical , pointer :: haslake(:)
+  real(r8), pointer :: pct_lake_max(:)
 
   ! whether we have urban to initialize in each grid cell
   ! (second dimension goes 1:numurbl)

diff --git a/src/main/subgridMod.F90 b/src/main/subgridMod.F90
@@ -568,11 +568,11 @@ function lake_landunit_exists(gi) result(exists)
     !
     ! !DESCRIPTION:
     ! Returns true if a land unit for lakes should be created in memory
-    ! which is defined for gridcells which will grow lake, given by haslake
+    ! which is defined for gridcells which will grow lake, given by pct_lake_max
     ! 
     ! !USES:
     use dynSubgridControlMod , only : get_do_transient_lakes
-    use clm_instur           , only : haslake
+    use clm_instur           , only : pct_lake_max
     !
     ! !ARGUMENTS:
     logical :: exists  ! function result
@@ -584,10 +584,10 @@ function lake_landunit_exists(gi) result(exists)
     !-----------------------------------------------------------------------
 
     if (get_do_transient_lakes()) then
-       ! To support dynamic landunits, we initialise a lake land unit in each grid cell in which there are lakes. 
-       ! This is defined by the haslake variable
+       ! To support dynamic landunits, we initialise a lake land unit in
+       ! each grid cell in which there are lakes as defined by pct_lake_max
 
-       if (haslake(gi)) then
+       if (pct_lake_max(gi) > 0._r8) then
             exists = .true.
        else
             exists = .false.

diff --git a/src/main/surfrdMod.F90 b/src/main/surfrdMod.F90
@@ -800,7 +800,7 @@ subroutine surfrd_lakemask(begg, endg)
     ! Necessary for the initialisation of the lake land units
     !
     ! !USES:
-     use clm_instur           , only : haslake
+     use clm_instur           , only : pct_lake_max
      use dynSubgridControlMod , only : get_flanduse_timeseries
      use clm_varctl           , only : fname_len
      use fileutils            , only : getfil
@@ -836,9 +836,9 @@ subroutine surfrd_lakemask(begg, endg)
     call ncd_pio_openfile (ncid_dynuse, trim(locfn), 0)
 
     ! read the lakemask
-    call ncd_io(ncid=ncid_dynuse, varname='HASLAKE'  , flag='read', data=haslake, &
+    call ncd_io(ncid=ncid_dynuse, varname='PCT_LAKE_MAX'  , flag='read', data=pct_lake_max, &
            dim1name=grlnd, readvar=readvar)
-    if (.not. readvar) call endrun( msg=' ERROR: HASLAKE is not on landuse.timeseries file'//errMsg(sourcefile, __LINE__))
+    if (.not. readvar) call endrun( msg=' ERROR: PCT_LAKE_MAX is not on landuse.timeseries file'//errMsg(sourcefile, __LINE__))
 
     ! close landuse_timeseries file again
     call ncd_pio_closefile(ncid_dynuse)

diff --git a/tools/mksurfdata_esmf/README b/tools/mksurfdata_esmf/README
@@ -13,10 +13,9 @@ Build Requirements:
 ===================
 
 mksurfdata_esmf is a distributed memory parallel program (using Message Passing
-Interface -- MPI) that utilizes
-both ESMF (Earth System Modelling Framework) for regridding as well as Parallel
-I/O (PIO) and NetCDF output. As
-such libraries must be built for the following:
+Interface -- MPI) that utilizes both ESMF (Earth System Modelling Framework)
+for regridding as well as PIO (Parallel I/O) and NetCDF output. As
+such, libraries must be built for the following:
 
 1) MPI
 2) NetCDF
@@ -25,35 +24,36 @@ such libraries must be built for the following:
 
 In addition for the build: python, bash-shell, CMake and GNU-Make are required
 
-These libraries need to be built such that they can all work together in the same executable. Hence, the above
-order may be required in building them.
+These libraries need to be built such that they can all work together in the
+same executable. Hence, the above order may be required in building them.
 
 =========================================
 Use cime to manage the build requirements
 =========================================
 
-For users working on cime machines you can use the build
-script to build the tool for you. On other machines you'll need to do a port to cime
-and tell how to build for that machine. That's talked about in the cime documentation.
+For users working on cime machines you can use the build script to build the
+tool. On other machines you'll need to do a port to cime and tell how to build
+for that machine. That's talked about in the cime documentation.
 And you'll have to make some modifications to the build script.
 
 https://github.com/ESMCI/cime/wiki/Porting-Overview
 
-Machines that already run CTSM or CESM have been ported to cime. So if you can run the model
-on your machine you will be able to build the tool there.
+Machines that already run CTSM or CESM have been ported to cime. So if you can
+run the model on your machine, you will be able to build the tool there.
 
 To get a list of the machines that have been ported to cime: 
 
 cd ../../cime/scripts  # assumes we are in tools/mksurfdata_esmf
 ./query_config --machines
 
 NOTE:
-In addition to having a port to cime, the machine also needs to have PIO built and able
-to be referenced with the env variable PIO which will need to be in the porting instructions
-for the machine. Currently an independent PIO library is not available on cime ported machines.
+In addition to having a port to cime, the machine also needs to have PIO built
+and able to be referenced with the env variable PIO which will need to be in
+the porting instructions for the machine. Currently an independent PIO library
+is not available on cime ported machines.
 
 =======================
-building the executable (working in tools/mksurfdata_esmf)
+Building the executable (working in tools/mksurfdata_esmf)
 =======================
 
 > ./gen_mksurfdata_build.sh      # For machines with a cime build
@@ -63,7 +63,7 @@ building the executable (working in tools/mksurfdata_esmf)
 # a default value does get set for other machines.
 
 =======================
-running for a single submission:
+Running for a single submission:
 =======================
 # to generate your target namelist:
 > ./gen_mksurfdata_namelist.py --help
@@ -83,7 +83,7 @@ running for a single submission:
 # Read note about regional grids below.
 
 =======================
-running for the generation of multiple datasets
+Running for the generation of multiple datasets
 =======================
 # Notes:
 # - gen_mksurfdata_jobscript_multi.py runs ./gen_mksurfdata_namelist.py for you
@@ -102,6 +102,7 @@ slevis HAS RUN THESE CASES and HAS LISTED ISSUES ENCOUNTERED
 ------------------------------------------------------------
 REMEMBER TO compare against existing fsurdat files in
 /glade/p/cesmdata/cseg/inputdata/lnd/clm2/surfdata_map/release-clm5.0.18
+0) New 30-sec raw data for soil texture fails. Try requesting more mem.
 1) Soil color & texture and ag fire peak month outputs too high in .log
    TODO? Change frac_o from always 1.
 2) Pct lake has chged in the .log bc the old diagnostic omitted mask_i frac_o

diff --git a/tools/mksurfdata_esmf/gen_mksurfdata_namelist.py b/tools/mksurfdata_esmf/gen_mksurfdata_namelist.py
@@ -196,7 +196,19 @@ def get_parser():
             [default: %(default)s]
             """,
         action="store_true",
-        dest="hres_flag",
+        dest="hres_pft",
+        default=False,
+    )
+    parser.add_argument(
+        "--hires_soitex",
+        help="""
+            If you want to use the high-resolution soil texture dataset rather
+            than the default lower resolution dataset.
+            (Low resolution is 5x5min, high resolution 30-second)
+            [default: %(default)s]
+            """,
+        action="store_true",
+        dest="hres_soitex",
         default=False,
     )
     parser.add_argument(
@@ -230,19 +242,6 @@ def get_parser():
         dest="potveg_flag",
         default=False,
     )
-    parser.add_argument(
-        "--merge_gis",
-        help="""
-        If you want to use the glacier dataset that merges in
-        the Greenland Ice Sheet data that CISM uses (typically
-        used only if consistency with CISM is important)
-        [default: %(default)s]
-        """,
-        action="store",
-        dest="merge_gis",
-        choices=["on","off"],
-        default="off",
-    )
     return parser
 
 def main ():
@@ -267,8 +266,8 @@ def main ():
     glc_flag = args.glc_flag
     potveg = args.potveg_flag
     glc_nec = args.glc_nec
-    merge_gis = args.merge_gis
-    if args.hres_flag:
+
+    if args.hres_pft:
         if (start_year == 1850 and end_year == 1850) or \
            (start_year == 2005 and end_year == 2005):
             hires_pft = 'on'
@@ -279,6 +278,13 @@ def main ():
     else:
         hires_pft = 'off'
 
+    if args.hres_soitex:
+        hires_soitex = 'on'
+    else:
+        hires_soitex = 'off'
+
+    verbose = args.verbose
+
     if force_model_mesh_file is not None:
         # open mesh_file to read element_count and, if available, orig_grid_dims
         mesh_file = netCDF4.Dataset(force_model_mesh_file, 'r')
@@ -369,10 +375,10 @@ def main ():
 
     # create attribute list for parsing xml file
     attribute_list = {'hires_pft':hires_pft,
+                      'hires_soitex':hires_soitex,
                       'pft_years':pft_years,
                       'pft_years_ssp':pft_years_ssp,
                       'ssp_rcp':ssp_rcp,
-                      'mergeGIS':merge_gis,
                       'res':res}
 
     # create dictionary for raw data files names
@@ -431,9 +437,10 @@ def main ():
                         print('WARNING: run ./download_input_data to try TO ' \
                               'OBTAIN MISSING FILES')
                         _must_run_download_input_data = True
-                elif 'urban_properties' in rawdata_files[child1.tag]:
-                   # Time-slice cases pull urban_properties from the transient
-                   # urban_properties data files
+                elif 'urban_properties' in rawdata_files[child1.tag] or \
+                     'lake' in rawdata_files[child1.tag]:
+                   # Time-slice cases pull urban_properties and %lake from the
+                   # corresponding transient files
                    rawdata_files[child1.tag] = rawdata_files[child1.tag]. \
                                                replace("%y",str(start_year))
 
@@ -455,6 +462,10 @@ def main ():
                 new_key = f"{child1.tag}_urban"
                 rawdata_files[new_key] = os.path.join(input_path, item.text)
 
+            if item.tag == 'lookup_filename':
+                new_key = f"{child1.tag}_lookup"
+                rawdata_files[new_key] = os.path.join(input_path, item.text)
+
     # determine output mesh
     xml_path = os.path.join(tool_path, '../../ccs_config/component_grids_nuopc.xml')
     tree2 = ET.parse(xml_path)