pydata · shoyer · Dec 30, 2018 · Dec 21, 2018 · Dec 21, 2018 · Dec 21, 2018
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -61,6 +61,9 @@ Enhancements
 - :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` now supports the
   ``loffset`` kwarg just like Pandas.
   By `Deepak Cherian <https://github.com/dcherian>`_
+- Datasets are now guaranteed to have a ``'source'`` encoding, so the source
- Datasets are now guaranteed to have a ``'source'`` encoding, so the source
+ - Datasets are now guaranteed to have an ``encoding.source`` attribute, so the source
- Datasets are now guaranteed to have a ``'source'`` encoding, so the source
+ - Datasets are now guaranteed to have an ``encoding.source`` attribute, so the source
+  file name is always stored (:issue:`2550`).
+  By `Tom Nicholas <http://github.com/TomNicholas>`_.
 - 0d slices of ndarrays are now obtained directly through indexing, rather than
   extracting and wrapping a scalar, avoiding unnecessary copying. By `Daniel
   Wennberg <https://github.com/danielwe>`_.

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -300,6 +300,7 @@ def maybe_decode_store(store, lock=False):
 
     if isinstance(filename_or_obj, backends.AbstractDataStore):
         store = filename_or_obj
+        ds = maybe_decode_store(store)
     elif isinstance(filename_or_obj, basestring):
 
         if (isinstance(filename_or_obj, bytes) and
@@ -340,15 +341,21 @@ def maybe_decode_store(store, lock=False):
                              % engine)
 
         with close_on_error(store):
-            return maybe_decode_store(store)
+            ds = maybe_decode_store(store)
     else:
         if engine is not None and engine != 'scipy':
             raise ValueError('can only read file-like objects with '
                              "default engine or engine='scipy'")
         # assume filename_or_obj is a file-like object
         store = backends.ScipyDataStore(filename_or_obj)
+        ds = maybe_decode_store(store)
 
-    return maybe_decode_store(store)
+    # Ensure source filename always stored in dataset object (GH issue #2550)
+    if 'source' not in ds.encoding:
+        if isinstance(filename_or_obj, basestring):
+            ds.encoding['source'] = filename_or_obj
+
+    return ds
 
 
 def open_dataarray(filename_or_obj, group=None, decode_cf=True,

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -3423,3 +3423,14 @@ def test_no_warning_from_dask_effective_get():
             ds = Dataset()
             ds.to_netcdf(tmpfile)
         assert len(record) == 0
+
+
+@requires_scipy_or_netCDF4
+def test_source_encoding_always_present():
+    # Test for GH issue #2550.
+    rnddata = np.random.randn(10)
+    original = Dataset({'foo': ('x', rnddata)})
+    with create_tmp_file() as tmp:
+        original.to_netcdf(tmp)
+        with open_dataset(tmp) as ds:
+            assert ds.encoding['source'] == tmp