Skip to content

Commit

Permalink
Fixed dask.optimize on datasets
Browse files Browse the repository at this point in the history
Another attempt to fix pydata#3698. The issue with my fix in is that we hit
`Variable._dask_finalize` in both `dask.optimize` and `dask.persist`. We
want to do the culling of unnecessary tasks (`test_persist_Dataset`) but
only in the persist case, not optimize (`test_optimize`).
  • Loading branch information
TomAugspurger committed Sep 18, 2020
1 parent 902f1fc commit e8106bb
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 4 deletions.
11 changes: 10 additions & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,10 +777,19 @@ def _dask_postcompute(results, info, *args):
@staticmethod
def _dask_postpersist(dsk, info, *args):
variables = {}
# postpersist is called in both dask.optimize and dask.persist
# When persisting, we want to filter out unrelated keys for
# each Variable's task graph.
is_persist = len(dsk) == len(info)
for is_dask, k, v in info:
if is_dask:
func, args2 = v
result = func(dsk, *args2)
if is_persist:
name = args2[1][0]
dsk2 = {k: v for k, v in dsk.items() if k[0] == name}
else:
dsk2 = dsk
result = func(dsk2, *args2)
else:
result = v
variables[k] = result
Expand Down
3 changes: 0 additions & 3 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,9 +501,6 @@ def __dask_postpersist__(self):

@staticmethod
def _dask_finalize(results, array_func, array_args, dims, attrs, encoding):
if isinstance(results, dict): # persist case
name = array_args[0]
results = {k: v for k, v in results.items() if k[0] == name}
data = array_func(results, *array_args)
return Variable(dims, data, attrs=attrs, encoding=encoding)

Expand Down
8 changes: 8 additions & 0 deletions xarray/tests/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -1607,3 +1607,11 @@ def test_more_transforms_pass_lazy_array_equiv(map_da, map_ds):
assert_equal(map_da._from_temp_dataset(map_da._to_temp_dataset()), map_da)
assert_equal(map_da.astype(map_da.dtype), map_da)
assert_equal(map_da.transpose("y", "x", transpose_coords=False).cxy, map_da.cxy)


def test_optimize():
# https://github.com/pydata/xarray/issues/3698
a = dask.array.ones((10, 4), chunks=(5, 2))
arr = xr.DataArray(a).chunk(5)
(arr2,) = dask.optimize(arr)
arr2.compute()

0 comments on commit e8106bb

Please sign in to comment.