pydata · TomNicholas · Jan 11, 2020 · Nov 8, 2019 · Nov 8, 2019 · Nov 11, 2019
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -69,6 +69,9 @@ New Features
   invoked. (:issue:`3378`, :pull:`3446`)
   By `Deepak Cherian <https://github.com/dcherian>`_ and
   `Guido Imperiale <https://github.com/crusaderky>`_.
+- Add `master_file` option in :py:func:`~xarray.open_mfdataset` to choose the
+  source file for global attributes in a multi-file dataset (:issue:`2382`,
+  :pull:`3498`) by `Julien Seguinot <https://github.com/juseg>_`.
 
 Bug fixes
 ~~~~~~~~~

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -718,6 +718,7 @@ def open_mfdataset(
     autoclose=None,
     parallel=False,
     join="outer",
+    master_file=0,
     **kwargs,
 ):
     """Open multiple files as a single dataset.
@@ -729,8 +730,8 @@ def open_mfdataset(
     ``combine_by_coords`` and ``combine_nested``. By default the old (now deprecated)
     ``auto_combine`` will be used, please specify either ``combine='by_coords'`` or
     ``combine='nested'`` in future. Requires dask to be installed. See documentation for
-    details on dask [1]. Attributes from the first dataset file are used for the
-    combined dataset.
+    details on dask [1]. Global attributes from the ``master_file`` are used
+    for the combined dataset.
 
     Parameters
     ----------
@@ -825,6 +826,10 @@ def open_mfdataset(
         - 'override': if indexes are of same size, rewrite indexes to be
           those of the first object with that dimension. Indexes for the same
           dimension must have the same size in all objects.
+    master_file : int or str, optional
+        Index or path of the file used to read global attributes from.
+        For instance use -1 to read history from the last file.
+        Note that wildcard matches are sorted by filename.
     **kwargs : optional
         Additional arguments passed on to :py:func:`xarray.open_dataset`.
 
@@ -959,7 +964,12 @@ def open_mfdataset(
         raise
 
     combined._file_obj = _MultiFileCloser(file_objs)
-    combined.attrs = datasets[0].attrs
+
+    # read global attributes from the master file path or index
+    if isinstance(master_file, str):
+        master_file = paths.index(master_file)
+    combined.attrs = datasets[master_file].attrs
+
     return combined
 
 

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -2799,6 +2799,42 @@ def test_attrs_mfdataset(self):
                     with raises_regex(AttributeError, "no attribute"):
                         actual.test2
 
+    def test_open_mfdataset_master_file_index(self):
+        original = Dataset({"foo": ("x", np.random.randn(10))})
+        with create_tmp_files(2) as (tmp1, tmp2):
+            ds1 = original.isel(x=slice(5))
+            ds2 = original.isel(x=slice(5, 10))
+            ds1.attrs["test1"] = "foo"
+            ds2.attrs["test2"] = "bar"
+            ds1.to_netcdf(tmp1)
+            ds2.to_netcdf(tmp2)
+            with open_mfdataset(
+                [tmp1, tmp2], concat_dim="x", combine="nested", master_file=-1
+            ) as actual:
+                # attributes are inherited from the master file
+                assert actual.test2 == ds2.test2
+                # attributes from ds1 are not retained, e.g.,
+                with raises_regex(AttributeError, "no attribute"):
+                    actual.test1
+
+    def test_open_mfdataset_master_file_path(self):
+        original = Dataset({"foo": ("x", np.random.randn(10))})
+        with create_tmp_files(2) as (tmp1, tmp2):
+            ds1 = original.isel(x=slice(5))
+            ds2 = original.isel(x=slice(5, 10))
+            ds1.attrs["test1"] = "foo"
+            ds2.attrs["test2"] = "bar"
+            ds1.to_netcdf(tmp1)
+            ds2.to_netcdf(tmp2)
+            with open_mfdataset(
+                [tmp1, tmp2], concat_dim="x", combine="nested", master_file=tmp2
+            ) as actual:
+                # attributes are inherited from the master file
+                assert actual.test2 == ds2.test2
+                # attributes from ds1 are not retained, e.g.,
+                with raises_regex(AttributeError, "no attribute"):
+                    actual.test1
+
     def test_open_mfdataset_auto_combine(self):
         original = Dataset({"foo": ("x", np.random.randn(10)), "x": np.arange(10)})
         with create_tmp_file() as tmp1: