-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Disable automatic cache with dask #1024
Merged
Merged
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
f3d74e8
Disabled auto-caching dask arrays when pickling and when invoking the…
gimperiale 26a6997
Minor tweaks
gimperiale 03fbdd1
Simplified Dataset.copy() and Dataset.compute()
gimperiale b04167c
Minor cleanup
gimperiale ca94cc7
Cleaned up dask test
gimperiale 91b8084
Merge branch 'master' into no_dask_resolve
gimperiale e46b61f
Integrate no_dask_resolve with dask_broadcast branches
gimperiale 90743f0
Don't chunk coords
gimperiale 30fbd8f
Merge branch 'master' into no_dask_resolve
gimperiale ac8e0cb
Added performance warning to release notes
gimperiale e7f600c
Fix bug that caused dask array to be computed and then discarded when…
gimperiale 2d85d90
Merged branch master into no_dask_resolve
gimperiale 28f1a6e
Merged branch master into no_dask_resolve
gimperiale 25569df
Merge branch 'master'
gimperiale 27b0916
Eagerly cache IndexVariables only
gimperiale 376200a
Load IndexVariable.data into memory in init
gimperiale File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -128,8 +128,12 @@ def assert_loads(vars=None): | |
if vars is None: | ||
vars = expected | ||
with self.roundtrip(expected) as actual: | ||
for v in actual.variables.values(): | ||
self.assertFalse(v._in_memory) | ||
for k, v in actual.variables.items(): | ||
# IndexVariables are eagerly loaded into memory | ||
if k in actual.dims: | ||
self.assertTrue(v._in_memory) | ||
else: | ||
self.assertFalse(v._in_memory) | ||
yield actual | ||
for k, v in actual.variables.items(): | ||
if k in vars: | ||
|
@@ -152,6 +156,31 @@ def assert_loads(vars=None): | |
actual = ds.load() | ||
self.assertDatasetAllClose(expected, actual) | ||
|
||
def test_dataset_compute(self): | ||
expected = create_test_data() | ||
|
||
with self.roundtrip(expected) as actual: | ||
# Test Dataset.compute() | ||
for k, v in actual.variables.items(): | ||
# IndexVariables are eagerly cached | ||
if k in actual.dims: | ||
self.assertTrue(v._in_memory) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would be slightly simpler just as |
||
else: | ||
self.assertFalse(v._in_memory) | ||
|
||
computed = actual.compute() | ||
|
||
for k, v in actual.variables.items(): | ||
if k in actual.dims: | ||
self.assertTrue(v._in_memory) | ||
else: | ||
self.assertFalse(v._in_memory) | ||
for v in computed.variables.values(): | ||
self.assertTrue(v._in_memory) | ||
|
||
self.assertDatasetAllClose(expected, actual) | ||
self.assertDatasetAllClose(expected, computed) | ||
|
||
def test_roundtrip_None_variable(self): | ||
expected = Dataset({None: (('x', 'y'), [[0, 1], [2, 3]])}) | ||
with self.roundtrip(expected) as actual: | ||
|
@@ -233,18 +262,6 @@ def test_roundtrip_coordinates(self): | |
with self.roundtrip(expected) as actual: | ||
self.assertDatasetIdentical(expected, actual) | ||
|
||
expected = original.copy() | ||
expected.attrs['coordinates'] = 'something random' | ||
with self.assertRaisesRegexp(ValueError, 'cannot serialize'): | ||
with self.roundtrip(expected): | ||
pass | ||
|
||
expected = original.copy(deep=True) | ||
expected['foo'].attrs['coordinates'] = 'something random' | ||
with self.assertRaisesRegexp(ValueError, 'cannot serialize'): | ||
with self.roundtrip(expected): | ||
pass | ||
|
||
def test_roundtrip_boolean_dtype(self): | ||
original = create_boolean_data() | ||
self.assertEqual(original['x'].dtype, 'bool') | ||
|
@@ -875,7 +892,26 @@ def test_read_byte_attrs_as_unicode(self): | |
@requires_dask | ||
@requires_scipy | ||
@requires_netCDF4 | ||
class DaskTest(TestCase): | ||
class DaskTest(TestCase, DatasetIOTestCases): | ||
@contextlib.contextmanager | ||
def create_store(self): | ||
yield Dataset() | ||
|
||
@contextlib.contextmanager | ||
def roundtrip(self, data, save_kwargs={}, open_kwargs={}): | ||
yield data.chunk() | ||
|
||
def test_roundtrip_datetime_data(self): | ||
# Override method in DatasetIOTestCases - remove not applicable save_kwds | ||
times = pd.to_datetime(['2000-01-01', '2000-01-02', 'NaT']) | ||
expected = Dataset({'t': ('t', times), 't0': times[0]}) | ||
with self.roundtrip(expected) as actual: | ||
self.assertDatasetIdentical(expected, actual) | ||
|
||
def test_write_store(self): | ||
# Override method in DatasetIOTestCases - not applicable to dask | ||
pass | ||
|
||
def test_open_mfdataset(self): | ||
original = Dataset({'foo': ('x', np.random.randn(10))}) | ||
with create_tmp_file() as tmp1: | ||
|
@@ -995,6 +1031,15 @@ def test_deterministic_names(self): | |
self.assertIn(tmp, dask_name) | ||
self.assertEqual(original_names, repeat_names) | ||
|
||
def test_dataarray_compute(self): | ||
# Test DataArray.compute() on dask backend. | ||
# The test for Dataset.compute() is already in DatasetIOTestCases; | ||
# however dask is the only tested backend which supports DataArrays | ||
actual = DataArray([1,2]).chunk() | ||
computed = actual.compute() | ||
self.assertFalse(actual._in_memory) | ||
self.assertTrue(computed._in_memory) | ||
self.assertDataArrayAllClose(actual, computed) | ||
|
||
@requires_scipy_or_netCDF4 | ||
@requires_pydap | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this branch not also apply to
dask_array_type
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In fact, if you manually create a
Variable
with a dask array you'll get aLazilyIndexedArray
at this point. Should this not also be kept unchanged?