formatting requested

pangeo-data · bradyrx · Feb 4, 2020 · Dec 29, 2019 · Dec 29, 2019 · Dec 29, 2019
commit 6bcc96ab66ac6faf25905dc81eb242bd83229d91
diff --git a/asv_bench/benchmarks/__init__.py b/asv_bench/benchmarks/__init__.py
@@ -1,6 +1,7 @@
 # https://github.com/pydata/xarray/blob/master/asv_bench/benchmarks/__init__.py
 import itertools
 
+import dask
 import numpy as np
 
 _counter = itertools.count()
@@ -47,3 +48,10 @@ def randint(low, high=None, size=None, frac_minus=None, seed=0):
         x.flat[inds] = -1
 
     return x
+
+
+def ensure_loaded(res):
+    """Compute no lazy results."""
+    if dask.is_dask_collection(res):
+        res = res.compute()
+    return res
diff --git a/asv_bench/benchmarks/benchmarks_hindcast.py b/asv_bench/benchmarks/benchmarks_hindcast.py
@@ -1,43 +1,33 @@
-# Write the benchmarking functions here.
-# See "Writing benchmarks" in the asv docs for more information.
-
-
-import dask
 import numpy as np
 import xarray as xr
 
 from climpred.bootstrap import bootstrap_hindcast
-# from climpred.constants import HINDCAST_COMPARISONS
 from climpred.prediction import compute_hindcast
 
-from . import parameterized, randn, requires_dask
+from . import ensure_loaded, parameterized, randn, requires_dask
 
-HINDCAST_COMPARISONS = ['m2r']  # e2r and probabilistic dont match
+# only take subselection of all possible metrics
 METRICS = ['rmse', 'pearson_r', 'crpss']
+# only take comparisons compatible with probabilistic metrics
+HINDCAST_COMPARISONS = ['m2o']
 
-bootstrap = 8
-
-
-def _ensure_loaded(res):
-    """Compute no lazy results."""
-    if dask.is_dask_collection(res):
-        res = res.compute()
-    return res
+BOOTSTRAP = 8
 
 
 class Generate:
     """
-    Generate random hind, control to be benckmarked.
+    Generate input data for benchmark.
     """
 
     timeout = 600
     repeat = (2, 5, 20)
 
-    def make_hind_ref(self):
-        """hind and ref mimik hindcast experiment"""
+    def make_hind_obs(self):
+        """Generates initialized hindcast, uninitialized historical and observational
+        data, mimicking a hindcast experiment."""
         self.hind = xr.Dataset()
-        self.reference = xr.Dataset()
-        self.hist = xr.Dataset()
+        self.observations = xr.Dataset()
+        self.uninit = xr.Dataset()
 
         self.nmember = 3
         self.nlead = 3
@@ -47,26 +37,26 @@ def make_hind_ref(self):
         self.init_end = 2000
         self.ninit = self.init_end - self.init_start
 
-        frac_nan = 0.0
+        FRAC_NAN = 0.0
 
         inits = np.arange(self.init_start, self.init_end)
         leads = np.arange(1, 1 + self.nlead)
         members = np.arange(1, 1 + self.nmember)
 
         lons = xr.DataArray(
-            np.linspace(0, 360, self.nx),
+            np.linspace(0.5, 359.5, self.nx),
             dims=('lon',),
             attrs={'units': 'degrees east', 'long_name': 'longitude'},
         )
         lats = xr.DataArray(
-            np.linspace(-90, 90, self.ny),
+            np.linspace(-89.5, 89.5, self.ny),
             dims=('lat',),
             attrs={'units': 'degrees north', 'long_name': 'latitude'},
         )
-        self.hind['tos'] = xr.DataArray(
+        self.hind['var'] = xr.DataArray(
             randn(
                 (self.nmember, self.ninit, self.nlead, self.nx, self.ny),
-                frac_nan=frac_nan,
+                frac_nan=FRAC_NAN,
             ),
             coords={
                 'member': members,
@@ -76,91 +66,111 @@ def make_hind_ref(self):
                 'lead': leads,
             },
             dims=('member', 'init', 'lead', 'lon', 'lat'),
-            name='tos',
+            name='var',
             encoding=None,
-            attrs={'units': 'foo units', 'description': 'a description'},
+            attrs={'units': 'var units', 'description': 'a description'},
         )
-        self.reference['tos'] = xr.DataArray(
-            randn((self.ninit, self.nx, self.ny), frac_nan=frac_nan),
+        self.observations['var'] = xr.DataArray(
+            randn((self.ninit, self.nx, self.ny), frac_nan=FRAC_NAN),
             coords={'lon': lons, 'lat': lats, 'time': inits},
             dims=('time', 'lon', 'lat'),
-            name='tos',
+            name='var',
             encoding=None,
-            attrs={'units': 'foo units', 'description': 'a description'},
+            attrs={'units': 'var units', 'description': 'a description'},
         )
 
-        self.hist['tos'] = xr.DataArray(
-            randn((self.ninit, self.nx, self.ny, self.nmember), frac_nan=frac_nan),
-            coords={'lon': lons, 'lat': lats, 'time': inits, 'member': members},
+        self.uninit['var'] = xr.DataArray(
+            randn(
+                (self.ninit, self.nx, self.ny, self.nmember), frac_nan=FRAC_NAN
+            ),
+            coords={
+                'lon': lons,
+                'lat': lats,
+                'time': inits,
+                'member': members,
+            },
             dims=('time', 'lon', 'lat', 'member'),
-            name='tos',
+            name='var',
             encoding=None,
-            attrs={'units': 'foo units', 'description': 'a description'},
+            attrs={'units': 'var units', 'description': 'a description'},
         )
 
         self.hind.attrs = {'history': 'created for xarray benchmarking'}
 
 
 class Compute(Generate):
     """
-    A few examples that benchmark climpred compute_hindcast.
+    Benchmark time and peak memory of `compute_hindcast` and `bootstrap_hindcast`.
     """
 
     def setup(self, *args, **kwargs):
-        self.make_hind_ref()
+        self.make_hind_obs()
 
     @parameterized(['metric', 'comparison'], (METRICS, HINDCAST_COMPARISONS))
     def time_compute_hindcast(self, metric, comparison):
-        """Take time for compute_hindcast."""
-        _ensure_loaded(
+        """Take time for `compute_hindcast`."""
+        ensure_loaded(
             compute_hindcast(
-                self.hind, self.reference, metric=metric, comparison=comparison
+                self.hind,
+                self.observations,
+                metric=metric,
+                comparison=comparison,
             )
         )
 
     @parameterized(['metric', 'comparison'], (METRICS, HINDCAST_COMPARISONS))
     def peakmem_compute_hindcast(self, metric, comparison):
-        """Take memory peak for compute_hindcast for all comparisons."""
-        _ensure_loaded(
+        """Take memory peak for `compute_hindcast`."""
+        ensure_loaded(
             compute_hindcast(
-                self.hind, self.reference, metric=metric, comparison=comparison
+                self.hind,
+                self.observations,
+                metric=metric,
+                comparison=comparison,
             )
         )
 
     @parameterized(['metric', 'comparison'], (METRICS, HINDCAST_COMPARISONS))
     def time_bootstrap_hindcast(self, metric, comparison):
-        """Take time for bootstrap_hindcast for one metric."""
-        _ensure_loaded(
+        """Take time for `bootstrap_hindcast`."""
+        ensure_loaded(
             bootstrap_hindcast(
                 self.hind,
-                self.reference,
-                self.hist,
+                self.uninit,
+                self.observations,
                 metric=metric,
                 comparison=comparison,
-                bootstrap=bootstrap,
+                bootstrap=BOOTSTRAP,
             )
         )
 
     @parameterized(['metric', 'comparison'], (METRICS, HINDCAST_COMPARISONS))
     def peakmem_bootstrap_hindcast(self, metric, comparison):
-        """Take memory peak for bootstrap_hindcast."""
-        _ensure_loaded(
+        """Take memory peak for `bootstrap_hindcast`."""
+        ensure_loaded(
             bootstrap_hindcast(
                 self.hind,
-                self.reference,
-                self.hist,
+                self.uninit,
+                self.observations,
                 metric=metric,
                 comparison=comparison,
-                bootstrap=bootstrap,
+                bootstrap=BOOTSTRAP,
             )
         )
 
 
 class ComputeDask(Compute):
     def setup(self, *args, **kwargs):
+        """Benchmark time and peak memory of `compute_hindcast` and
+        `bootstrap_hindcast`. This executes the same tests as `Compute` but on chunked
+        data."""
         requires_dask()
+        # magic taken from
+        # https://github.com/pydata/xarray/blob/stable/asv_bench/benchmarks/rolling.py
         super().setup(**kwargs)
         # chunk along a spatial dimension to enable embarrasingly parallel computation
-        self.hind = self.hind['tos'].chunk({'lon': self.nx // bootstrap})
-        self.reference = self.reference['tos'].chunk({'lon': self.nx // bootstrap})
-        self.hist = self.hist['tos'].chunk({'lon': self.nx // bootstrap})
+        self.hind = self.hind['var'].chunk({'lon': self.nx // BOOTSTRAP})
+        self.observations = self.observations['var'].chunk(
+            {'lon': self.nx // BOOTSTRAP}
+        )
+        self.uninit = self.uninit['var'].chunk({'lon': self.nx // BOOTSTRAP})