From a3598665d479e63b26cf3f96591038e5ae6f8744 Mon Sep 17 00:00:00 2001 From: Unknown Date: Wed, 7 Aug 2019 10:19:33 +0100 Subject: [PATCH 1/5] pyupgrade (manually vetted and tweaked) --- versioneer.py | 23 +++++++++--------- xarray/_version.py | 10 ++++---- xarray/backends/api.py | 4 ++-- xarray/backends/netCDF4_.py | 12 ++++++---- xarray/backends/netcdf3.py | 9 ++++---- xarray/backends/pseudonetcdf_.py | 10 ++++---- xarray/backends/pynio_.py | 10 ++++---- xarray/backends/zarr.py | 9 ++++---- xarray/coding/cftime_offsets.py | 2 +- xarray/coding/times.py | 2 +- xarray/conventions.py | 2 +- xarray/convert.py | 6 ++--- xarray/core/alignment.py | 8 +++---- xarray/core/combine.py | 13 ++++++----- xarray/core/dataarray.py | 2 +- xarray/core/formatting.py | 14 ++++++----- xarray/core/groupby.py | 36 ++++++++++++++++++----------- xarray/core/indexing.py | 2 +- xarray/core/ops.py | 6 +++-- xarray/core/variable.py | 18 +++++++++------ xarray/plot/plot.py | 2 +- xarray/tests/test_backends.py | 22 ++++++++++-------- xarray/tests/test_coding_strings.py | 4 ++-- xarray/tests/test_coding_times.py | 2 +- xarray/tests/test_concat.py | 11 ++++++--- xarray/tests/test_conventions.py | 6 ++--- xarray/tests/test_dataarray.py | 6 ++--- xarray/tests/test_dataset.py | 34 ++++++++++++++++----------- xarray/tests/test_plot.py | 8 +++---- xarray/tutorial.py | 2 +- 30 files changed, 168 insertions(+), 127 deletions(-) diff --git a/versioneer.py b/versioneer.py index 577743023ca..e369108b439 100644 --- a/versioneer.py +++ b/versioneer.py @@ -398,7 +398,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, stderr=(subprocess.PIPE if hide_stderr else None)) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -421,7 +421,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, return stdout, p.returncode -LONG_VERSION_PY['git'] = ''' +LONG_VERSION_PY['git'] = r''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -968,7 +968,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -992,11 +992,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1005,7 +1005,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1148,7 +1148,7 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): if "export-subst" in line.strip().split()[1:]: present = True f.close() - except EnvironmentError: + except OSError: pass if not present: f = open(".gitattributes", "a+") @@ -1206,7 +1206,7 @@ def versions_from_file(filename): try: with open(filename) as f: contents = f.read() - except EnvironmentError: + except OSError: raise NotThisMethod("unable to read _version.py") mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) @@ -1702,8 +1702,7 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except (EnvironmentError, configparser.NoSectionError, - configparser.NoOptionError) as e: + except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print("Adding sample versioneer config to setup.cfg", file=sys.stderr) @@ -1728,7 +1727,7 @@ def do_setup(): try: with open(ipy, "r") as f: old = f.read() - except EnvironmentError: + except OSError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) @@ -1752,7 +1751,7 @@ def do_setup(): if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) - except EnvironmentError: + except OSError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so diff --git a/xarray/_version.py b/xarray/_version.py index df4ee95ade4..442e56a04b0 100644 --- a/xarray/_version.py +++ b/xarray/_version.py @@ -81,7 +81,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, stderr=(subprocess.PIPE if hide_stderr else None)) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -153,7 +153,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -177,11 +177,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -190,7 +190,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 21d91a886af..8e8db8e42de 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -742,7 +742,7 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied', paths = [str(p) if isinstance(p, Path) else p for p in paths] if not paths: - raise IOError('no files to open') + raise OSError('no files to open') # If combine='by_coords' then this is unnecessary, but quick. # If combine='nested' then this creates a flat list which is easier to @@ -1039,7 +1039,7 @@ def save_mfdataset(datasets, paths, mode='w', format=None, groups=None, if groups is None: groups = [None] * len(datasets) - if len(set([len(datasets), len(paths), len(groups)])) > 1: + if len({len(datasets), len(paths), len(groups)}) > 1: raise ValueError('must supply lists of the same length for the ' 'datasets, paths and groups arguments to ' 'save_mfdataset') diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 962cba4012d..a93fba65d18 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -138,7 +138,7 @@ def _netcdf4_create_group(dataset, name): def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group): - if group in set([None, '', '/']): + if group in {None, '', '/'}: # use the root group return ds else: @@ -155,7 +155,7 @@ def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group): ds = create_group(ds, key) else: # wrap error to provide slightly more helpful message - raise IOError('group not found: %s' % key, e) + raise OSError('group not found: %s' % key, e) return ds @@ -195,9 +195,11 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, encoding = variable.encoding.copy() - safe_to_drop = set(['source', 'original_shape']) - valid_encodings = set(['zlib', 'complevel', 'fletcher32', 'contiguous', - 'chunksizes', 'shuffle', '_FillValue', 'dtype']) + safe_to_drop = {'source', 'original_shape'} + valid_encodings = { + 'zlib', 'complevel', 'fletcher32', 'contiguous', + 'chunksizes', 'shuffle', '_FillValue', 'dtype' + } if lsd_okay: valid_encodings.add('least_significant_digit') if h5py_okay: diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index 7f5c8d4b1a7..4985e51f689 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -11,9 +11,10 @@ # The following are reserved names in CDL and may not be used as names of # variables, dimension, attributes -_reserved_names = set(['byte', 'char', 'short', 'ushort', 'int', 'uint', - 'int64', 'uint64', 'float' 'real', 'double', 'bool', - 'string']) +_reserved_names = { + 'byte', 'char', 'short', 'ushort', 'int', 'uint', 'int64', 'uint64', + 'float' 'real', 'double', 'bool', 'string' +} # These data-types aren't supported by netCDF3, so they are automatically # coerced instead as indicated by the "coerce_nc3_dtype" function @@ -108,4 +109,4 @@ def is_valid_nc3_name(s): ('/' not in s) and (s[-1] != ' ') and (_isalnumMUTF8(s[0]) or (s[0] == '_')) and - all((_isalnumMUTF8(c) or c in _specialchars for c in s))) + all(_isalnumMUTF8(c) or c in _specialchars for c in s)) diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py index 7a3f8a771e6..92f1a575d45 100644 --- a/xarray/backends/pseudonetcdf_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -75,17 +75,17 @@ def get_variables(self): for k, v in self.ds.variables.items()) def get_attrs(self): - return Frozen(dict([(k, getattr(self.ds, k)) - for k in self.ds.ncattrs()])) + return Frozen({k: getattr(self.ds, k) for k in self.ds.ncattrs()}) def get_dimensions(self): return Frozen(self.ds.dimensions) def get_encoding(self): encoding = {} - encoding['unlimited_dims'] = set( - [k for k in self.ds.dimensions - if self.ds.dimensions[k].isunlimited()]) + encoding['unlimited_dims'] = { + k for k in self.ds.dimensions + if self.ds.dimensions[k].isunlimited() + } return encoding def close(self): diff --git a/xarray/backends/pynio_.py b/xarray/backends/pynio_.py index f8033551f96..9c3946f657d 100644 --- a/xarray/backends/pynio_.py +++ b/xarray/backends/pynio_.py @@ -75,10 +75,12 @@ def get_dimensions(self): return Frozen(self.ds.dimensions) def get_encoding(self): - encoding = {} - encoding['unlimited_dims'] = set( - [k for k in self.ds.dimensions if self.ds.unlimited(k)]) - return encoding + return { + 'unlimited_dims': { + k for k in self.ds.dimensions + if self.ds.unlimited(k) + } + } def close(self): self._manager.close() diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index c0634fff009..effacd8b4b7 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -166,8 +166,7 @@ def _get_zarr_dims_and_attrs(zarr_obj, dimension_key): def _extract_zarr_variable_encoding(variable, raise_on_invalid=False): encoding = variable.encoding.copy() - valid_encodings = set(['chunks', 'compressor', 'filters', - 'cache_metadata']) + valid_encodings = {'chunks', 'compressor', 'filters', 'cache_metadata'} if raise_on_invalid: invalid = [k for k in encoding if k not in valid_encodings] @@ -340,8 +339,10 @@ def store(self, variables, attributes, check_encoding_set=frozenset(), only needed in append mode """ - existing_variables = set([vn for vn in variables - if _encode_variable_name(vn) in self.ds]) + existing_variables = { + vn for vn in variables + if _encode_variable_name(vn) in self.ds + } new_variables = set(variables) - existing_variables variables_without_encoding = OrderedDict([(vn, variables[vn]) for vn in new_variables]) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 400cfe11d33..7187f1266bd 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -637,7 +637,7 @@ def __apply__(self, other): _FREQUENCY_CONDITION = '|'.join(_FREQUENCIES.keys()) -_PATTERN = r'^((?P\d+)|())(?P({0}))$'.format( +_PATTERN = r'^((?P\d+)|())(?P({}))$'.format( _FREQUENCY_CONDITION) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index ea18b402ad2..4930a77d022 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -23,7 +23,7 @@ # standard calendars recognized by cftime -_STANDARD_CALENDARS = set(['standard', 'gregorian', 'proleptic_gregorian']) +_STANDARD_CALENDARS = {'standard', 'gregorian', 'proleptic_gregorian'} _NS_PER_TIME_DELTA = {'us': int(1e3), 'ms': int(1e6), diff --git a/xarray/conventions.py b/xarray/conventions.py index d0d90242426..616e557efcd 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -186,7 +186,7 @@ def ensure_dtype_not_object(var, name=None): if strings.is_bytes_dtype(inferred_dtype): fill_value = b'' elif strings.is_unicode_dtype(inferred_dtype): - fill_value = u'' + fill_value = '' else: # insist on using float for numeric values if not np.issubdtype(inferred_dtype, np.floating): diff --git a/xarray/convert.py b/xarray/convert.py index b8c0c2a7eca..83055631bb5 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -30,7 +30,7 @@ def encode(var): def _filter_attrs(attrs, ignored_attrs): """ Return attrs that are not in ignored_attrs """ - return dict((k, v) for k, v in attrs.items() if k not in ignored_attrs) + return {k: v for k, v in attrs.items() if k not in ignored_attrs} def from_cdms2(variable): @@ -119,7 +119,7 @@ def set_cdms2_attrs(var, attrs): def _pick_attrs(attrs, keys): """ Return attrs with keys in keys list """ - return dict((k, v) for k, v in attrs.items() if k in keys) + return {k: v for k, v in attrs.items() if k in keys} def _get_iris_args(attrs): @@ -188,7 +188,7 @@ def _iris_obj_to_attrs(obj): if obj.units.origin != '1' and not obj.units.is_unknown(): attrs['units'] = obj.units.origin attrs.update(obj.attributes) - return dict((k, v) for k, v in attrs.items() if v is not None) + return {k: v for k, v in attrs.items() if v is not None} def _iris_cell_methods_to_str(cell_methods_obj): diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 711634a95ca..1db9157850a 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -341,10 +341,10 @@ def reindex_variables( for dim, indexer in indexers.items(): if isinstance(indexer, DataArray) and indexer.dims != (dim,): warnings.warn( - "Indexer has dimensions {0:s} that are different " - "from that to be indexed along {1:s}. " - "This will behave differently in the future.".format( - str(indexer.dims), dim), + "Indexer has dimensions {:s} that are different " + "from that to be indexed along {:s}. " + "This will behave differently in the future." + .format(str(indexer.dims), dim), FutureWarning, stacklevel=3) target = new_indexes[dim] = utils.safe_cast_to_index(indexers[dim]) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 5718698f852..3764f758682 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -41,9 +41,8 @@ def _infer_tile_ids_from_nested_list(entry, current_pos): if isinstance(entry, list): for i, item in enumerate(entry): - for result in _infer_tile_ids_from_nested_list(item, - current_pos + (i,)): - yield result + yield from _infer_tile_ids_from_nested_list( + item, current_pos + (i,)) else: yield current_pos, entry @@ -699,10 +698,12 @@ def _auto_concat(datasets, dim=None, data_vars='all', coords='different', concat_dims = set(ds0.dims) if ds0.dims != ds1.dims: dim_tuples = set(ds0.dims.items()) - set(ds1.dims.items()) - concat_dims = set(i for i, _ in dim_tuples) + concat_dims = {i for i, _ in dim_tuples} if len(concat_dims) > 1: - concat_dims = set(d for d in concat_dims - if not ds0[d].equals(ds1[d])) + concat_dims = { + d for d in concat_dims + if not ds0[d].equals(ds1[d]) + } if len(concat_dims) > 1: raise ValueError('too many different dimensions to ' 'concatenate: %s' % concat_dims) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 19d595079e5..70d11fe18ca 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1389,7 +1389,7 @@ def expand_dims(self, dim: Union[None, Hashable, Sequence[Hashable], elif isinstance(dim, Sequence) and not isinstance(dim, str): if len(dim) != len(set(dim)): raise ValueError('dims should not contain duplicate values.') - dim = OrderedDict(((d, 1) for d in dim)) + dim = OrderedDict((d, 1) for d in dim) elif dim is not None and not isinstance(dim, Mapping): dim = OrderedDict(((cast(Hashable, dim), 1),)) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 00c813ece09..3ddffec8e5e 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -145,7 +145,7 @@ def format_item(x, timedelta_format=None, quote_strings=True): elif isinstance(x, (str, bytes)): return repr(x) if quote_strings else x elif isinstance(x, (float, np.float)): - return '{0:.4}'.format(x) + return '{:.4}'.format(x) else: return str(x) @@ -399,7 +399,7 @@ def short_data_repr(array): elif array._in_memory or array.size < 1e5: return short_array_repr(array.data) else: - return u'[{} values with dtype={}]'.format(array.size, array.dtype) + return '[{} values with dtype={}]'.format(array.size, array.dtype) def array_repr(arr): @@ -409,10 +409,12 @@ def array_repr(arr): else: name_str = '' - summary = [''.format( - type(arr).__name__, name_str, dim_summary(arr))] - - summary.append(short_data_repr(arr)) + summary = [ + ''.format( + type(arr).__name__, name_str, dim_summary(arr) + ), + short_data_repr(arr) + ] if hasattr(arr, 'coords'): if arr.coords: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 0649ecab44f..2be0857a4d3 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -46,20 +46,30 @@ def _dummy_copy(xarray_obj): from .dataset import Dataset from .dataarray import DataArray if isinstance(xarray_obj, Dataset): - res = Dataset(dict((k, dtypes.get_fill_value(v.dtype)) - for k, v in xarray_obj.data_vars.items()), - dict((k, dtypes.get_fill_value(v.dtype)) - for k, v in xarray_obj.coords.items() - if k not in xarray_obj.dims), - xarray_obj.attrs) + res = Dataset( + { + k: dtypes.get_fill_value(v.dtype) + for k, v in xarray_obj.data_vars.items() + }, + { + k: dtypes.get_fill_value(v.dtype) + for k, v in xarray_obj.coords.items() + if k not in xarray_obj.dims + }, + xarray_obj.attrs + ) elif isinstance(xarray_obj, DataArray): - res = DataArray(dtypes.get_fill_value(xarray_obj.dtype), - dict((k, dtypes.get_fill_value(v.dtype)) - for k, v in xarray_obj.coords.items() - if k not in xarray_obj.dims), - dims=[], - name=xarray_obj.name, - attrs=xarray_obj.attrs) + res = DataArray( + dtypes.get_fill_value(xarray_obj.dtype), + { + k: dtypes.get_fill_value(v.dtype) + for k, v in xarray_obj.coords.items() + if k not in xarray_obj.dims + }, + dims=[], + name=xarray_obj.name, + attrs=xarray_obj.attrs + ) else: # pragma: no cover raise AssertionError return res diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index aea5a5a3f4f..a9ad55e2652 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -149,7 +149,7 @@ def convert_label_indexer(index, label, index_name='', method=None, raise ValueError('cannot use a dict-like object for selection on ' 'a dimension that does not have a MultiIndex') elif len(label) == index.nlevels and not is_nested_vals: - indexer = index.get_loc(tuple((label[k] for k in index.names))) + indexer = index.get_loc(tuple(label[k] for k in index.names)) else: for k, v in label.items(): # index should be an item (i.e. Hashable) not an array-like diff --git a/xarray/core/ops.py b/xarray/core/ops.py index 3759a7c5634..0c0fc1e50a8 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -258,8 +258,10 @@ def get_op(name): return getattr(operator, op_str(name)) -NON_INPLACE_OP = dict((get_op('i' + name), get_op(name)) - for name in NUM_BINARY_OPS) +NON_INPLACE_OP = { + get_op('i' + name): get_op(name) + for name in NUM_BINARY_OPS +} def inplace_to_noninplace_op(f): diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 3c9d85f13d7..85f26d85cd4 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -537,9 +537,10 @@ def _validate_indexers(self, key): if k.dtype.kind == 'b': if self.shape[self.get_axis_num(dim)] != len(k): raise IndexError( - "Boolean array size {0:d} is used to index array " - "with shape {1:s}.".format(len(k), - str(self.shape))) + "Boolean array size {:d} is used to index array " + "with shape {:s}." + .format(len(k), str(self.shape)) + ) if k.ndim > 1: raise IndexError("{}-dimensional boolean indexing is " "not supported. ".format(k.ndim)) @@ -547,8 +548,9 @@ def _validate_indexers(self, key): raise IndexError( "Boolean indexer should be unlabeled or on the " "same dimension to the indexed array. Indexer is " - "on {0:s} but the target dimension is " - "{1:s}.".format(str(k.dims), dim)) + "on {:s} but the target dimension is {:s}." + .format(str(k.dims), dim) + ) def _broadcast_indexes_outer(self, key): dims = tuple(k.dims[0] if isinstance(k, Variable) else dim @@ -888,8 +890,10 @@ def chunk(self, chunks=None, name=None, lock=False): import dask.array as da if utils.is_dict_like(chunks): - chunks = dict((self.get_axis_num(dim), chunk) - for dim, chunk in chunks.items()) + chunks = { + self.get_axis_num(dim): chunk + for dim, chunk in chunks.items() + } if chunks is None: chunks = self.chunks or self.shape diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index d0003b702df..26102a044e3 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -20,7 +20,7 @@ def _infer_line_data(darray, x, y, hue): - error_msg = ('must be either None or one of ({0:s})' + error_msg = ('must be either None or one of ({:s})' .format(', '.join([repr(dd) for dd in darray.dims]))) ndims = len(darray.dims) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 026ae6a55ff..ae5a77e0f32 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1077,11 +1077,11 @@ def test_open_encodings(self): with open_dataset(tmp_file) as actual: assert_equal(actual['time'], expected['time']) - actual_encoding = dict((k, v) for k, v in - actual['time'].encoding.items() - if k in expected['time'].encoding) - assert actual_encoding == \ - expected['time'].encoding + actual_encoding = { + k: v for k, v in actual['time'].encoding.items() + if k in expected['time'].encoding + } + assert actual_encoding == expected['time'].encoding def test_dump_encodings(self): # regression test for #709 @@ -2857,11 +2857,15 @@ def test_deterministic_names(self): data = create_test_data() data.to_netcdf(tmp) with open_mfdataset(tmp, combine='by_coords') as ds: - original_names = dict((k, v.data.name) - for k, v in ds.data_vars.items()) + original_names = { + k: v.data.name + for k, v in ds.data_vars.items() + } with open_mfdataset(tmp, combine='by_coords') as ds: - repeat_names = dict((k, v.data.name) - for k, v in ds.data_vars.items()) + repeat_names = { + k: v.data.name + for k, v in ds.data_vars.items() + } for var_name, dask_name in original_names.items(): assert var_name in dask_name assert dask_name[:13] == 'open_dataset-' diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index 98824c9136c..13c0983212e 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -35,7 +35,7 @@ def test_vlen_dtype(): def test_EncodedStringCoder_decode(): coder = strings.EncodedStringCoder() - raw_data = np.array([b'abc', 'ß∂µ∆'.encode('utf-8')]) + raw_data = np.array([b'abc', 'ß∂µ∆'.encode()]) raw = Variable(('x',), raw_data, {'_Encoding': 'utf-8'}) actual = coder.decode(raw) @@ -50,7 +50,7 @@ def test_EncodedStringCoder_decode(): def test_EncodedStringCoder_decode_dask(): coder = strings.EncodedStringCoder() - raw_data = np.array([b'abc', 'ß∂µ∆'.encode('utf-8')]) + raw_data = np.array([b'abc', 'ß∂µ∆'.encode()]) raw = Variable(('x',), raw_data, {'_Encoding': 'utf-8'}).chunk() actual = coder.decode(raw) assert isinstance(actual.data, da.Array) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index dacf68f6be8..82afeab7aba 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -451,7 +451,7 @@ def test_decode_360_day_calendar(): calendar = '360_day' # ensure leap year doesn't matter for year in [2010, 2011, 2012, 2013, 2014]: - units = 'days since {0}-01-01'.format(year) + units = 'days since {}-01-01'.format(year) num_times = np.arange(100) if cftime.__name__ == 'cftime': diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 6218f752bb7..1a7d3de5a1d 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -29,9 +29,14 @@ def test_concat(self): def rectify_dim_order(dataset): # return a new dataset with all variable dimensions transposed into # the order in which they are found in `data` - return Dataset(dict((k, v.transpose(*data[k].dims)) - for k, v in dataset.data_vars.items()), - dataset.coords, attrs=dataset.attrs) + return Dataset( + { + k: v.transpose(*data[k].dims) + for k, v in dataset.data_vars.items() + }, + dataset.coords, + attrs=dataset.attrs + ) for dim in ['dim1', 'dim2']: datasets = [g for _, g in data.groupby(dim, squeeze=False)] diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index b9690c211f4..e7cb8006b08 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -115,9 +115,9 @@ def test_multidimensional_coordinates(self): foo1_coords = enc['foo1'].attrs.get('coordinates', '') foo2_coords = enc['foo2'].attrs.get('coordinates', '') foo3_coords = enc['foo3'].attrs.get('coordinates', '') - assert set(foo1_coords.split()) == set(['lat1', 'lon1']) - assert set(foo2_coords.split()) == set(['lat2', 'lon2']) - assert set(foo3_coords.split()) == set(['lat3', 'lon3']) + assert set(foo1_coords.split()) == {'lat1', 'lon1'} + assert set(foo2_coords.split()) == {'lat2', 'lon2'} + assert set(foo3_coords.split()) == {'lat3', 'lon3'} # Should not have any global coordinates. assert 'coordinates' not in attrs diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 000469f24bf..3a19c229fe6 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1685,7 +1685,7 @@ def test_math_with_coords(self): assert_identical(expected, actual) actual = orig + orig[0, 0] - exp_coords = dict((k, v) for k, v in coords.items() if k != 'lat') + exp_coords = {k: v for k, v in coords.items() if k != 'lat'} expected = DataArray(orig.values + orig.values[0, 0], exp_coords, dims=['x', 'y']) assert_identical(expected, actual) @@ -3377,7 +3377,7 @@ def test__title_for_slice(self): assert '' == a2._title_for_slice() def test__title_for_slice_truncate(self): - array = DataArray(np.ones((4))) + array = DataArray(np.ones(4)) array.coords['a'] = 'a' * 100 array.coords['b'] = 'b' * 100 @@ -3773,7 +3773,7 @@ def test_rolling_wrapped_bottleneck(da, name, center, min_periods): # Test all bottleneck functions rolling_obj = da.rolling(time=7, min_periods=min_periods) - func_name = 'move_{0}'.format(name) + func_name = 'move_{}'.format(name) actual = getattr(rolling_obj, name)() expected = getattr(bn, func_name)(da.values, window=7, axis=1, min_count=min_periods) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index fc15393f269..78891045bae 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -136,8 +136,10 @@ def lazy_inaccessible(k, v): data = indexing.LazilyOuterIndexedArray( InaccessibleArray(v.values)) return Variable(v.dims, data, v.attrs) - return dict((k, lazy_inaccessible(k, v)) for - k, v in self._variables.items()) + return { + k: lazy_inaccessible(k, v) + for k, v in self._variables.items() + } class TestDataset: @@ -239,7 +241,7 @@ def test_unicode_data(self): repr(data) # should not raise byteorder = '<' if sys.byteorder == 'little' else '>' - expected = dedent(u"""\ + expected = dedent("""\ Dimensions: (foø: 1) Coordinates: @@ -520,7 +522,7 @@ def test_attr_access(self): assert ds.title == ds.attrs['title'] assert ds.tmin.units == ds['tmin'].attrs['units'] - assert set(['tmin', 'title']) <= set(dir(ds)) + assert {'tmin', 'title'} <= set(dir(ds)) assert 'units' in set(dir(ds.tmin)) # should defer to variable of same name @@ -1953,8 +1955,9 @@ def test_drop_variables(self): assert_identical(data, data.drop([])) - expected = Dataset(dict((k, data[k]) for k in data.variables - if k != 'time')) + expected = Dataset( + {k: data[k] for k in data.variables if k != 'time'} + ) actual = data.drop('time') assert_identical(expected, actual) actual = data.drop(['time']) @@ -2951,9 +2954,9 @@ def test_delitem(self): all_items = set(data.variables) assert set(data.variables) == all_items del data['var1'] - assert set(data.variables) == all_items - set(['var1']) + assert set(data.variables) == all_items - {'var1'} del data['numbers'] - assert set(data.variables) == all_items - set(['var1', 'numbers']) + assert set(data.variables) == all_items - {'var1', 'numbers'} assert 'numbers' not in data.coords expected = Dataset() @@ -2966,8 +2969,12 @@ def test_squeeze(self): for args in [[], [['x']], [['x', 'z']]]: def get_args(v): return [set(args[0]) & set(v.dims)] if args else [] - expected = Dataset(dict((k, v.squeeze(*get_args(v))) - for k, v in data.variables.items())) + expected = Dataset( + { + k: v.squeeze(*get_args(v)) + for k, v in data.variables.items() + } + ) expected = expected.set_coords(data.coords) assert_identical(expected, data.squeeze(*args)) # invalid squeeze @@ -3869,8 +3876,9 @@ def test_reduce(self): assert len(data.mean().coords) == 0 actual = data.max() - expected = Dataset(dict((k, v.max()) - for k, v in data.data_vars.items())) + expected = Dataset( + {k: v.max() for k, v in data.data_vars.items()} + ) assert_equal(expected, actual) assert_equal(data.min(dim=['dim1']), @@ -4981,7 +4989,7 @@ def test_rolling_wrapped_bottleneck(ds, name, center, min_periods, key): # Test all bottleneck functions rolling_obj = ds.rolling(time=7, min_periods=min_periods) - func_name = 'move_{0}'.format(name) + func_name = 'move_{}'.format(name) actual = getattr(rolling_obj, name)() if key == 'z1': # z1 does not depend on 'Time' axis. Stored as it is. expected = ds[key] diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 2a13c131bf3..d6a580048c7 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -45,7 +45,7 @@ def substring_in_axes(substring, ax): ''' Return True if a substring is found anywhere in an axes ''' - alltxt = set([t.get_text() for t in ax.findobj(mpl.text.Text)]) + alltxt = {t.get_text() for t in ax.findobj(mpl.text.Text)} for txt in alltxt: if substring in txt: return True @@ -1158,9 +1158,9 @@ def test_facetgrid_cmap(self): d = DataArray(data, dims=['x', 'y', 'time']) fg = d.plot.pcolormesh(col='time') # check that all color limits are the same - assert len(set(m.get_clim() for m in fg._mappables)) == 1 + assert len({m.get_clim() for m in fg._mappables}) == 1 # check that all colormaps are the same - assert len(set(m.get_cmap().name for m in fg._mappables)) == 1 + assert len({m.get_cmap().name for m in fg._mappables}) == 1 def test_facetgrid_cbar_kwargs(self): a = easy_array((10, 15, 2, 3)) @@ -1498,7 +1498,7 @@ def test_names_appear_somewhere(self): self.darray.name = 'testvar' self.g.map_dataarray(xplt.contourf, 'x', 'y') for k, ax in zip('abc', self.g.axes.flat): - assert 'z = {0}'.format(k) == ax.get_title() + assert 'z = {}'.format(k) == ax.get_title() alltxt = text_in_fig() assert self.darray.name in alltxt diff --git a/xarray/tutorial.py b/xarray/tutorial.py index 01d4f181d7f..0d9009f439d 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -77,7 +77,7 @@ def open_dataset(name, cache=True, cache_dir=_default_cache_dir, msg = """ MD5 checksum does not match, try downloading dataset again. """ - raise IOError(msg) + raise OSError(msg) ds = _open_dataset(localfile, **kws) From d0ad3a54e041e301f34a73276738e6d029a6ed35 Mon Sep 17 00:00:00 2001 From: Unknown Date: Wed, 7 Aug 2019 10:27:11 +0100 Subject: [PATCH 2/5] pyupgrade --- xarray/core/dataset.py | 31 ++++++++++++++++--------------- xarray/tests/test_sparse.py | 8 ++++---- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 5d3ca932ccc..7188b5e67c2 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -113,7 +113,7 @@ def calculate_dimensions( """ dims = {} # type: Dict[Any, int] last_used = {} - scalar_vars = set(k for k, v in variables.items() if not v.dims) + scalar_vars = {k for k, v in variables.items() if not v.dims} for k, var in variables.items(): for dim, size in zip(var.dims, var.shape): if dim in scalar_vars: @@ -997,7 +997,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> 'Dataset': for v in variables.values(): needed_dims.update(v.dims) - dims = dict((k, self.dims[k]) for k in needed_dims) + dims = {k: self.dims[k] for k in needed_dims} for k in self._coord_names: if set(self.variables[k].dims) <= needed_dims: @@ -1569,7 +1569,7 @@ def chunk( def selkeys(dict_, keys): if dict_ is None: return None - return dict((d, dict_[d]) for d in keys if d in dict_) + return {d: dict_[d] for d in keys if d in dict_} def maybe_chunk(name, var, chunks): chunks = selkeys(chunks, var.dims) @@ -1923,7 +1923,7 @@ def relevant_keys(mapping): raise ValueError('Indexers must be 1 dimensional') # all the indexers should have the same length - lengths = set(len(v) for k, v in indexers) + lengths = {len(v) for k, v in indexers} if len(lengths) > 1: raise ValueError('All indexers must be the same length') @@ -2577,7 +2577,7 @@ def swap_dims( 'variable along the old dimension %r' % (v, k)) - result_dims = set(dims_dict.get(dim, dim) for dim in self.dims) + result_dims = {dims_dict.get(dim, dim) for dim in self.dims} coord_names = self._coord_names.copy() coord_names.update(dims_dict.values()) @@ -2674,7 +2674,7 @@ def expand_dims( elif isinstance(dim, Sequence): if len(dim) != len(set(dim)): raise ValueError('dims should not contain duplicate values.') - dim = OrderedDict(((d, 1) for d in dim)) + dim = OrderedDict((d, 1) for d in dim) dim = either_dict_or_kwargs(dim, dim_kwargs, 'expand_dims') assert isinstance(dim, MutableMapping) @@ -2905,7 +2905,7 @@ def _stack_once(self, dims, new_dim): idx = utils.multiindex_from_product_levels(levels, names=dims) variables[new_dim] = IndexVariable(new_dim, idx) - coord_names = set(self._coord_names) - set(dims) | set([new_dim]) + coord_names = set(self._coord_names) - set(dims) | {new_dim} indexes = OrderedDict((k, v) for k, v in self.indexes.items() if k not in dims) @@ -3103,7 +3103,7 @@ def _unstack_once(self, dim: Hashable) -> 'Dataset': variables[name] = IndexVariable(name, lev) indexes[name] = lev - coord_names = set(self._coord_names) - set([dim]) | set(new_dim_names) + coord_names = set(self._coord_names) - {dim} | set(new_dim_names) return self._replace_with_new_dims( variables, coord_names=coord_names, indexes=indexes) @@ -3348,7 +3348,7 @@ def _drop_vars( variables = OrderedDict((k, v) for k, v in self._variables.items() if k not in names) - coord_names = set(k for k in self._coord_names if k in variables) + coord_names = {k for k in self._coord_names if k in variables} indexes = OrderedDict((k, v) for k, v in self.indexes.items() if k not in names) return self._replace_with_new_dims( @@ -3740,7 +3740,7 @@ def reduce( allow_lazy=allow_lazy, **kwargs) - coord_names = set(k for k in self.coords if k in variables) + coord_names = {k for k in self.coords if k in variables} indexes = OrderedDict((k, v) for k, v in self.indexes.items() if k in variables) attrs = self.attrs if keep_attrs else None @@ -4079,7 +4079,7 @@ def from_dict(cls, d): DataArray.from_dict """ - if not set(['coords', 'data_vars']).issubset(set(d)): + if not {'coords', 'data_vars'}.issubset(set(d)): variables = d.items() else: import itertools @@ -4250,8 +4250,9 @@ def diff(self, dim, n=1, label='upper'): if n == 0: return self if n < 0: - raise ValueError('order `n` must be non-negative but got {0}' - ''.format(n)) + raise ValueError( + 'order `n` must be non-negative but got {}'.format(n) + ) # prepare slices kwargs_start = {dim: slice(None, -1)} @@ -4530,7 +4531,7 @@ def quantile(self, q, dim=None, interpolation='linear', """ if isinstance(dim, str): - dims = set([dim]) + dims = {dim} elif dim is None: dims = set(self.dims) else: @@ -4561,7 +4562,7 @@ def quantile(self, q, dim=None, interpolation='linear', variables[name] = var # construct the new dataset - coord_names = set(k for k in self.coords if k in variables) + coord_names = {k for k in self.coords if k in variables} indexes = OrderedDict( (k, v) for k, v in self.indexes.items() if k in variables ) diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 3aa407f72bc..329952bc064 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -535,7 +535,7 @@ def test_ufuncs(self): def test_dataarray_repr(self): a = xr.DataArray( - COO.from_numpy(np.ones((4))), + COO.from_numpy(np.ones(4)), dims=['x'], coords={'y': ('x', COO.from_numpy(np.arange(4)))}) expected = dedent("""\ @@ -548,7 +548,7 @@ def test_dataarray_repr(self): def test_dataset_repr(self): ds = xr.Dataset( - data_vars={'a': ('x', COO.from_numpy(np.ones((4))))}, + data_vars={'a': ('x', COO.from_numpy(np.ones(4)))}, coords={'y': ('x', COO.from_numpy(np.arange(4)))}) expected = dedent("""\ @@ -562,7 +562,7 @@ def test_dataset_repr(self): def test_dataarray_pickle(self): a1 = xr.DataArray( - COO.from_numpy(np.ones((4))), + COO.from_numpy(np.ones(4)), dims=['x'], coords={'y': ('x', COO.from_numpy(np.arange(4)))}) a2 = pickle.loads(pickle.dumps(a1)) @@ -570,7 +570,7 @@ def test_dataarray_pickle(self): def test_dataset_pickle(self): ds1 = xr.Dataset( - data_vars={'a': ('x', COO.from_numpy(np.ones((4))))}, + data_vars={'a': ('x', COO.from_numpy(np.ones(4)))}, coords={'y': ('x', COO.from_numpy(np.arange(4)))}) ds2 = pickle.loads(pickle.dumps(ds1)) assert_identical(ds1, ds2) From 7b490f495ca730c4a9a57ef80291a982a3e87f15 Mon Sep 17 00:00:00 2001 From: Unknown Date: Wed, 7 Aug 2019 10:29:47 +0100 Subject: [PATCH 3/5] Tweaks to Dataset.drop_dims() --- xarray/core/dataset.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 7188b5e67c2..3d2ef53a034 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3364,7 +3364,7 @@ def drop_dims( Parameters ---------- - drop_dims : str or list + drop_dims : hashable or iterable of hashable Dimension or dimensions to drop. errors: {'raise', 'ignore'}, optional If 'raise' (default), raises a ValueError error if any of the @@ -3386,18 +3386,20 @@ def drop_dims( raise ValueError('errors must be either "raise" or "ignore"') if isinstance(drop_dims, str) or not isinstance(drop_dims, Iterable): - drop_dims = [drop_dims] + drop_dims = {drop_dims} else: - drop_dims = list(drop_dims) + drop_dims = set(drop_dims) if errors == 'raise': - missing_dimensions = [d for d in drop_dims if d not in self.dims] - if missing_dimensions: + missing_dims = drop_dims - set(self.dims) + if missing_dims: raise ValueError('Dataset does not contain the dimensions: %s' - % missing_dimensions) + % missing_dims) - drop_vars = set(k for k, v in self._variables.items() - for d in v.dims if d in drop_dims) + drop_vars = { + k for k, v in self._variables.items() + if set(v.dims) & drop_dims + } return self._drop_vars(drop_vars) def transpose(self, *dims: Hashable) -> 'Dataset': From 590d48157e244647ea5bdaa6fdd4d5ad835f5387 Mon Sep 17 00:00:00 2001 From: Unknown Date: Wed, 7 Aug 2019 10:31:13 +0100 Subject: [PATCH 4/5] mypy --- setup.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.cfg b/setup.cfg index 8b76dd27879..128550071cc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -83,6 +83,8 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-seaborn.*] ignore_missing_imports = True +[mypy-sparse.*] +ignore_missing_imports = True [mypy-toolz.*] ignore_missing_imports = True [mypy-zarr.*] From 58121fa5e584cccb85c4fe25bfc3f608a5c9c55d Mon Sep 17 00:00:00 2001 From: Guido Imperiale Date: Wed, 7 Aug 2019 14:50:17 +0100 Subject: [PATCH 5/5] More concise code --- xarray/backends/pseudonetcdf_.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py index 92f1a575d45..34a61ae8108 100644 --- a/xarray/backends/pseudonetcdf_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -81,12 +81,12 @@ def get_dimensions(self): return Frozen(self.ds.dimensions) def get_encoding(self): - encoding = {} - encoding['unlimited_dims'] = { - k for k in self.ds.dimensions - if self.ds.dimensions[k].isunlimited() + return { + 'unlimited_dims': { + k for k in self.ds.dimensions + if self.ds.dimensions[k].isunlimited() + } } - return encoding def close(self): self._manager.close()