MPAS-Dev · xylar · Dec 5, 2018 · Dec 2, 2018 · Dec 5, 2018
diff --git a/docs/config/input.rst b/docs/config/input.rst
@@ -35,15 +35,6 @@ these data will be read in::
   # names of ocean and sea ice meshes (e.g. oEC60to30, oQU240, oRRS30to10, etc.)
   mpasMeshName = mesh
 
-  # The system has a limit to how many files can be open at one time.  By
-  # default, xarray attempts to open all files in a data set simultaneously.
-  # A new option allows files to be automatically closed as a data set is being
-  # read to prevent hitting this limit.  Here, you can set what fraction of the
-  # system limit of open files an analysis task is allowed to use.  Note: In the
-  # future when multiple tasks can run simultaneously, the system file limit will
-  # first be divided among the tasks before applying this fraction.
-  autocloseFileLimitFraction = 0.5
-
   # Large datasets can encounter a memory error.  Specification of a maximum
   # chunk size `maxChunkSize` can be helpful to prevent the memory error.  The
   # current maximum chunk size assumes approximately 64GB of ram and large files
@@ -181,12 +172,8 @@ multi-file data sets using xarray in favor of concatinating these data sets
 together using NCO tools, there are some legacy options that users can modify
 if they experience errors related to dask::
 
-  autocloseFileLimitFraction = 0.5
   maxChunkSize = 10000
 
-If an error occurs relating to too many open files, you may wish to reduce
-``autocloseFileLimitFraction`` to a smaller fraction.
-
 If an out of memory error occurs, it may first be worth reducing the number
 of parallel tasks running (see :ref:`config_execute`) but if the error is
 clearly related to dask (which might be the case, for example, if the error

diff --git a/mpas_analysis/config.default b/mpas_analysis/config.default
@@ -85,15 +85,6 @@ seaIceStreamsFileName = streams.seaice
 # names of ocean and sea ice meshes (e.g. oEC60to30, oQU240, oRRS30to10, etc.)
 mpasMeshName = mesh
 
-# The system has a limit to how many files can be open at one time.  By
-# default, xarray attempts to open all files in a data set simultaneously.
-# A new option allows files to be automatically closed as a data set is being
-# read to prevent hitting this limit.  Here, you can set what fraction of the
-# system limit of open files an analysis task is allowed to use.  Note: In the
-# future when multiple tasks can run simultaneously, the system file limit will
-# first be divided among the tasks before applying this fraction.
-autocloseFileLimitFraction = 0.5
-
 # Large datasets can encounter a memory error.  Specification of a maximum
 # chunk size `maxChunkSize` can be helpful to prevent the memory error.  The
 # current maximum chunk size assumes approximately 64GB of ram and large files

diff --git a/mpas_analysis/shared/generalized_reader/generalized_reader.py b/mpas_analysis/shared/generalized_reader/generalized_reader.py
@@ -145,36 +145,13 @@ def open_multifile_dataset(fileNames, calendar, config,
     kwargs = {'decode_times': False,
               'concat_dim': 'Time'}
 
-    autocloseFileLimitFraction = config.getfloat('input',
-                                                 'autocloseFileLimitFraction')
-
     # get the number of files that can be open at the same time.  We want the
     # "soft" limit because we'll get a crash if we exceed it.
     softLimit = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
 
-    # use autoclose if we will use more than autocloseFileLimitFraction (50%
-    # by default) of the soft limit of open files
-    autoclose = len(fileNames) > softLimit*autocloseFileLimitFraction
-
-    try:
-        ds = xarray.open_mfdataset(fileNames,
-                                   preprocess=preprocess_partial,
-                                   autoclose=autoclose, **kwargs)
-    except TypeError as e:
-        if 'autoclose' in str(e):
-            if autoclose:
-                # This indicates that xarray version doesn't support autoclose
-                print('Warning: open_multifile_dataset is trying to use '
-                      'autoclose=True but\n'
-                      'it appears your xarray version doesn\'t support this '
-                      'argument. Will\n'
-                      'try again without autoclose argument.')
-
-            ds = xarray.open_mfdataset(fileNames,
-                                       preprocess=preprocess_partial,
-                                       **kwargs)
-        else:
-            raise e
+    ds = xarray.open_mfdataset(fileNames,
+                               preprocess=preprocess_partial,
+                               **kwargs)
 
     ds = mpas_xarray.remove_repeated_time_index(ds)
 
@@ -201,9 +178,6 @@ def open_multifile_dataset(fileNames, calendar, config,
 
     ds = mpas_xarray.process_chunking(ds, chunking)
 
-    # private record of autoclose use
-    ds.attrs['_autoclose'] = int(autoclose)
-
     return ds  # }}}
 
 

diff --git a/mpas_analysis/test/test_climatology.py b/mpas_analysis/test/test_climatology.py
@@ -48,12 +48,9 @@ def tearDown(self):
         # Remove the directory after the test
         shutil.rmtree(self.test_dir)
 
-    def setup_config(self, autocloseFileLimitFraction=0.5,
-                     maxChunkSize=10000):
+    def setup_config(self, maxChunkSize=10000):
         config = MpasAnalysisConfigParser()
         config.add_section('input')
-        config.set('input', 'autocloseFileLimitFraction',
-                   str(autocloseFileLimitFraction))
         config.set('input', 'maxChunkSize', str(maxChunkSize))
         config.set('input', 'mpasMeshName', 'QU240')
 

diff --git a/mpas_analysis/test/test_generalized_reader.py b/mpas_analysis/test/test_generalized_reader.py
@@ -29,12 +29,9 @@
 @pytest.mark.usefixtures("loaddatadir")
 class TestGeneralizedReader(TestCase):
 
-    def setup_config(self, autocloseFileLimitFraction=0.5,
-                           maxChunkSize=10000):
+    def setup_config(self, maxChunkSize=10000):
         config = MpasAnalysisConfigParser()
         config.add_section('input')
-        config.set('input', 'autocloseFileLimitFraction',
-                   str(autocloseFileLimitFraction))
         config.set('input', 'maxChunkSize', str(maxChunkSize))
         return config
 
@@ -136,36 +133,4 @@ def test_start_end(self):
                 endDate='0005-03-01')
             self.assertEqual(len(ds.Time), 1)
 
-    def test_open_process_climatology(self):
-        fileNames = [str(self.datadir.join('timeSeries.0002-{:02d}-01.nc'.format(month)))
-                     for month in [1, 2, 3]]
-        calendar = 'gregorian_noleap'
-        variableMap = {'mld': ['timeMonthly_avg_tThreshMLD'],
-                       'Time': [['xtime_startMonthly', 'xtime_endMonthly']]}
-        annualClimatologies = []
-        for frac, autoclose in zip([1.0, 0.], [False, True]):
-            # effectively, test with autoclose=False and autoclose=True
-            config = self.setup_config(autocloseFileLimitFraction=frac)
-            ds = open_multifile_dataset(
-                fileNames=fileNames,
-                calendar=calendar,
-                config=config,
-                timeVariableName='Time',
-                variableList=['mld'],
-                variableMap=variableMap)
-
-            # note, the asserts for autoclose below are only guaranteed
-            # to work immediately following call to open_multifile_dataset
-            assert hasattr(ds, '_autoclose'), \
-                '`autoclose` not defined for dataset'
-            if hasattr(ds, '_autoclose'):
-                assert ds._autoclose == int(autoclose), \
-                        ('`autoclose` used for dataset is inconsistent '
-                         'with expected test value.')
-
-            annualClimatologies.append(ds.mean(dim='Time'))
-
-        self.assertArrayEqual(annualClimatologies[0].mld.values,
-                              annualClimatologies[1].mld.values)
-
 # vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python