Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove code related to deprecated "autoclose" option #501

Merged
merged 2 commits into from
Dec 5, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions docs/config/input.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,6 @@ these data will be read in::
# names of ocean and sea ice meshes (e.g. oEC60to30, oQU240, oRRS30to10, etc.)
mpasMeshName = mesh

# The system has a limit to how many files can be open at one time. By
# default, xarray attempts to open all files in a data set simultaneously.
# A new option allows files to be automatically closed as a data set is being
# read to prevent hitting this limit. Here, you can set what fraction of the
# system limit of open files an analysis task is allowed to use. Note: In the
# future when multiple tasks can run simultaneously, the system file limit will
# first be divided among the tasks before applying this fraction.
autocloseFileLimitFraction = 0.5

# Large datasets can encounter a memory error. Specification of a maximum
# chunk size `maxChunkSize` can be helpful to prevent the memory error. The
# current maximum chunk size assumes approximately 64GB of ram and large files
Expand Down Expand Up @@ -181,12 +172,8 @@ multi-file data sets using xarray in favor of concatinating these data sets
together using NCO tools, there are some legacy options that users can modify
if they experience errors related to dask::

autocloseFileLimitFraction = 0.5
maxChunkSize = 10000

If an error occurs relating to too many open files, you may wish to reduce
``autocloseFileLimitFraction`` to a smaller fraction.

If an out of memory error occurs, it may first be worth reducing the number
of parallel tasks running (see :ref:`config_execute`) but if the error is
clearly related to dask (which might be the case, for example, if the error
Expand Down
9 changes: 0 additions & 9 deletions mpas_analysis/config.default
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,6 @@ seaIceStreamsFileName = streams.seaice
# names of ocean and sea ice meshes (e.g. oEC60to30, oQU240, oRRS30to10, etc.)
mpasMeshName = mesh

# The system has a limit to how many files can be open at one time. By
# default, xarray attempts to open all files in a data set simultaneously.
# A new option allows files to be automatically closed as a data set is being
# read to prevent hitting this limit. Here, you can set what fraction of the
# system limit of open files an analysis task is allowed to use. Note: In the
# future when multiple tasks can run simultaneously, the system file limit will
# first be divided among the tasks before applying this fraction.
autocloseFileLimitFraction = 0.5

# Large datasets can encounter a memory error. Specification of a maximum
# chunk size `maxChunkSize` can be helpful to prevent the memory error. The
# current maximum chunk size assumes approximately 64GB of ram and large files
Expand Down
32 changes: 3 additions & 29 deletions mpas_analysis/shared/generalized_reader/generalized_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,36 +145,13 @@ def open_multifile_dataset(fileNames, calendar, config,
kwargs = {'decode_times': False,
'concat_dim': 'Time'}

autocloseFileLimitFraction = config.getfloat('input',
'autocloseFileLimitFraction')

# get the number of files that can be open at the same time. We want the
# "soft" limit because we'll get a crash if we exceed it.
softLimit = resource.getrlimit(resource.RLIMIT_NOFILE)[0]

# use autoclose if we will use more than autocloseFileLimitFraction (50%
# by default) of the soft limit of open files
autoclose = len(fileNames) > softLimit*autocloseFileLimitFraction

try:
ds = xarray.open_mfdataset(fileNames,
preprocess=preprocess_partial,
autoclose=autoclose, **kwargs)
except TypeError as e:
if 'autoclose' in str(e):
if autoclose:
# This indicates that xarray version doesn't support autoclose
print('Warning: open_multifile_dataset is trying to use '
'autoclose=True but\n'
'it appears your xarray version doesn\'t support this '
'argument. Will\n'
'try again without autoclose argument.')

ds = xarray.open_mfdataset(fileNames,
preprocess=preprocess_partial,
**kwargs)
else:
raise e
ds = xarray.open_mfdataset(fileNames,
preprocess=preprocess_partial,
**kwargs)

ds = mpas_xarray.remove_repeated_time_index(ds)

Expand All @@ -201,9 +178,6 @@ def open_multifile_dataset(fileNames, calendar, config,

ds = mpas_xarray.process_chunking(ds, chunking)

# private record of autoclose use
ds.attrs['_autoclose'] = int(autoclose)

return ds # }}}


Expand Down
5 changes: 1 addition & 4 deletions mpas_analysis/test/test_climatology.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,9 @@ def tearDown(self):
# Remove the directory after the test
shutil.rmtree(self.test_dir)

def setup_config(self, autocloseFileLimitFraction=0.5,
maxChunkSize=10000):
def setup_config(self, maxChunkSize=10000):
config = MpasAnalysisConfigParser()
config.add_section('input')
config.set('input', 'autocloseFileLimitFraction',
str(autocloseFileLimitFraction))
config.set('input', 'maxChunkSize', str(maxChunkSize))
config.set('input', 'mpasMeshName', 'QU240')

Expand Down
37 changes: 1 addition & 36 deletions mpas_analysis/test/test_generalized_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,9 @@
@pytest.mark.usefixtures("loaddatadir")
class TestGeneralizedReader(TestCase):

def setup_config(self, autocloseFileLimitFraction=0.5,
maxChunkSize=10000):
def setup_config(self, maxChunkSize=10000):
config = MpasAnalysisConfigParser()
config.add_section('input')
config.set('input', 'autocloseFileLimitFraction',
str(autocloseFileLimitFraction))
config.set('input', 'maxChunkSize', str(maxChunkSize))
return config

Expand Down Expand Up @@ -136,36 +133,4 @@ def test_start_end(self):
endDate='0005-03-01')
self.assertEqual(len(ds.Time), 1)

def test_open_process_climatology(self):
fileNames = [str(self.datadir.join('timeSeries.0002-{:02d}-01.nc'.format(month)))
for month in [1, 2, 3]]
calendar = 'gregorian_noleap'
variableMap = {'mld': ['timeMonthly_avg_tThreshMLD'],
'Time': [['xtime_startMonthly', 'xtime_endMonthly']]}
annualClimatologies = []
for frac, autoclose in zip([1.0, 0.], [False, True]):
# effectively, test with autoclose=False and autoclose=True
config = self.setup_config(autocloseFileLimitFraction=frac)
ds = open_multifile_dataset(
fileNames=fileNames,
calendar=calendar,
config=config,
timeVariableName='Time',
variableList=['mld'],
variableMap=variableMap)

# note, the asserts for autoclose below are only guaranteed
# to work immediately following call to open_multifile_dataset
assert hasattr(ds, '_autoclose'), \
'`autoclose` not defined for dataset'
if hasattr(ds, '_autoclose'):
assert ds._autoclose == int(autoclose), \
('`autoclose` used for dataset is inconsistent '
'with expected test value.')

annualClimatologies.append(ds.mean(dim='Time'))

self.assertArrayEqual(annualClimatologies[0].mld.values,
annualClimatologies[1].mld.values)

# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python