Merge pull request #430 from CCI-Tools/xx-jg-more-monitors

Add more monitors in ops
CCI-Tools · Sep 27, 2017 · c1155d6 · c1155d6
2 parents 1830f5d + 3b01860
commit c1155d6
Show file tree

Hide file tree

Showing 6 changed files with 156 additions and 98 deletions.
diff --git a/cate/ops/anomaly.py b/cate/ops/anomaly.py
@@ -63,8 +63,9 @@ def anomaly_external(ds: xr.Dataset,
 
     :param ds: The dataset to calculate anomalies from
     :param file: Path to reference data file
-    :param str: Apply the given transformation before calculating the anomaly.
+    :param transform: Apply the given transformation before calculating the anomaly.
     For supported operations see help on 'ds_arithmetics' operation.
+    :param monitor: a progress monitor.
     :return: The anomaly dataset
     """
     # Check if the time coordinate is of dtype datetime
@@ -126,7 +127,8 @@ def _group_anomaly(group: xr.Dataset,
 @op_return(add_history=True)
 def anomaly_internal(ds: xr.Dataset,
                      time_range: TimeRangeLike.TYPE = None,
-                     region: PolygonLike.TYPE = None) -> xr.Dataset:
+                     region: PolygonLike.TYPE = None,
+                     monitor: Monitor = Monitor.NONE) -> xr.Dataset:
     """
     Calculate anomaly using as reference data the mean of an optional region
     and time slice from the given dataset. If no time slice/spatial region is
@@ -137,6 +139,7 @@ def anomaly_internal(ds: xr.Dataset,
     :param ds: The dataset to calculate anomalies from
     :param time_range: Time range to use for reference data
     :param region: Spatial region to use for reference data
+    :param monitor: a progress monitor.
     :return: The anomaly dataset
     """
     ref = ds.copy()
@@ -146,5 +149,7 @@ def anomaly_internal(ds: xr.Dataset,
     if region:
         region = PolygonLike.convert(region)
         ref = subset_spatial(ref, region)
-    ref = ref.mean(keep_attrs=True, skipna=True)
-    return ds - ref
+    with monitor.observing("Calculating anomaly"):
+        ref = ref.mean(keep_attrs=True, skipna=True)
+        diff = ds - ref
+    return diff
diff --git a/cate/ops/arithmetics.py b/cate/ops/arithmetics.py
@@ -34,13 +34,15 @@
 
 from cate.core.op import op, op_input, op_return
 from cate.core.types import DatasetLike
+from cate.util.monitor import Monitor
 
 
 @op(tags=['arithmetic'], version='1.0')
 @op_input('ds', data_type=DatasetLike)
 @op_return(add_history=True)
 def ds_arithmetics(ds: DatasetLike.TYPE,
-                   op: str) -> xr.Dataset:
+                   op: str,
+                   monitor: Monitor = Monitor.NONE) -> xr.Dataset:
     """
     Do arithmetic operations on the given dataset by providing a list of
     arithmetic operations and the corresponding constant. The operations will
@@ -62,40 +64,45 @@ def ds_arithmetics(ds: DatasetLike.TYPE,
 
     :param ds: The dataset to which to apply arithmetic operations
     :param op: A comma separated list of arithmetic operations to apply
+    :param monitor: a progress monitor.
     :return: The dataset with given arithmetic operations applied
     """
     ds = DatasetLike.convert(ds)
     retset = ds
-    for item in op.split(','):
-        item = item.strip()
-        if item[0] == '+':
-            retset = retset + float(item[1:])
-        elif item[0] == '-':
-            retset = retset - float(item[1:])
-        elif item[0] == '*':
-            retset = retset * float(item[1:])
-        elif item[0] == '/':
-            retset = retset / float(item[1:])
-        elif item[:] == 'log':
-            retset = xu.log(retset)
-        elif item[:] == 'log10':
-            retset = xu.log10(retset)
-        elif item[:] == 'log2':
-            retset = xu.log2(retset)
-        elif item[:] == 'log1p':
-            retset = xu.log1p(retset)
-        elif item[:] == 'exp':
-            retset = xu.exp(retset)
-        else:
-            raise ValueError('Arithmetic operation {} not'
-                             ' implemented.'.format(item[0]))
+    with monitor.starting('Calculate result', total_work=len(op.split(','))):
+        for item in op.split(','):
+            with monitor.child(1).observing("Calculate"):
+                item = item.strip()
+                if item[0] == '+':
+                    retset = retset + float(item[1:])
+                elif item[0] == '-':
+                    retset = retset - float(item[1:])
+                elif item[0] == '*':
+                    retset = retset * float(item[1:])
+                elif item[0] == '/':
+                    retset = retset / float(item[1:])
+                elif item[:] == 'log':
+                    retset = xu.log(retset)
+                elif item[:] == 'log10':
+                    retset = xu.log10(retset)
+                elif item[:] == 'log2':
+                    retset = xu.log2(retset)
+                elif item[:] == 'log1p':
+                    retset = xu.log1p(retset)
+                elif item[:] == 'exp':
+                    retset = xu.exp(retset)
+                else:
+                    raise ValueError('Arithmetic operation {} not'
+                                     ' implemented.'.format(item[0]))
 
     return retset
 
 
 @op(tags=['arithmetic'], version='1.0')
 @op_return(add_history=True)
-def diff(ds: xr.Dataset, ds2: xr.Dataset) -> xr.Dataset:
+def diff(ds: xr.Dataset,
+         ds2: xr.Dataset,
+         monitor: Monitor = Monitor.NONE) -> xr.Dataset:
     """
     Calculate the difference of two datasets (ds - ds2). This is done by
     matching variable names in the two datasets against each other and taking
@@ -111,6 +118,7 @@ def diff(ds: xr.Dataset, ds2: xr.Dataset) -> xr.Dataset:
 
     :param ds: The minuend dataset
     :param ds2: The subtrahend dataset
+    :param monitor: a progress monitor.
     :return: The difference dataset
     """
     try:
@@ -143,4 +151,7 @@ def diff(ds: xr.Dataset, ds2: xr.Dataset) -> xr.Dataset:
         else:
             raise TypeError(str(e))
 
-    return ds - ds2
+    with monitor.observing("Subtract datasets"):
+        diff = ds - ds2
+
+    return diff
diff --git a/cate/ops/correlation.py b/cate/ops/correlation.py
@@ -38,6 +38,7 @@
 
 from cate.core.op import op, op_input, op_return
 from cate.core.types import VarName, DatasetLike
+from cate.util import Monitor
 
 from cate.ops.normalize import adjust_spatial_attrs
 
@@ -50,7 +51,8 @@
 def pearson_correlation_scalar(ds_x: DatasetLike.TYPE,
                                ds_y: DatasetLike.TYPE,
                                var_x: VarName.TYPE,
-                               var_y: VarName.TYPE) -> pd.DataFrame:
+                               var_y: VarName.TYPE,
+                               monitor: Monitor = Monitor.NONE) -> pd.DataFrame:
     """
     Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis.
 
@@ -68,6 +70,7 @@ def pearson_correlation_scalar(ds_x: DatasetLike.TYPE,
     :param ds_y: The 'y' dataset
     :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset
     :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset
+    :param monitor: a progress monitor.
     :returns: {'corr_coef': correlation coefficient, 'p_value': probability value}
     """
     ds_x = DatasetLike.convert(ds_x)
@@ -94,7 +97,9 @@ def pearson_correlation_scalar(ds_x: DatasetLike.TYPE,
         raise ValueError('The length of the time dimension should not be less'
                          ' than three to run the calculation.')
 
-    cc, pv = pearsonr(array_x.values, array_y.values)
+    with monitor.observing("Calculate Pearson correlation"):
+        cc, pv = pearsonr(array_x.values, array_y.values)
+
     return pd.DataFrame({'corr_coef': [cc], 'p_value': [pv]})
 
 
@@ -107,7 +112,8 @@ def pearson_correlation_scalar(ds_x: DatasetLike.TYPE,
 def pearson_correlation(ds_x: DatasetLike.TYPE,
                         ds_y: DatasetLike.TYPE,
                         var_x: VarName.TYPE,
-                        var_y: VarName.TYPE) -> xr.Dataset:
+                        var_y: VarName.TYPE,
+                        monitor: Monitor = Monitor.NONE) -> xr.Dataset:
     """
     Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis.
 
@@ -137,6 +143,7 @@ def pearson_correlation(ds_x: DatasetLike.TYPE,
     :param ds_y: The 'y' dataset
     :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset
     :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset
+    :param monitor: a progress monitor.
     :returns: a dataset containing a map of correlation coefficients and p_values
     """
     ds_x = DatasetLike.convert(ds_x)
@@ -189,13 +196,13 @@ def pearson_correlation(ds_x: DatasetLike.TYPE,
                          ' than three to run the calculation.')
 
     # Do pixel by pixel correlation
-    retset = _pearsonr(array_x, array_y)
+    retset = _pearsonr(array_x, array_y, monitor)
     retset.attrs['Cate_Description'] = 'Correlation between {} {}'.format(var_y, var_x)
 
     return adjust_spatial_attrs(retset)
 
 
-def _pearsonr(x: xr.DataArray, y: xr.DataArray) -> xr.Dataset:
+def _pearsonr(x: xr.DataArray, y: xr.DataArray, monitor: Monitor) -> xr.Dataset:
     """
     Calculates Pearson correlation coefficients and p-values for testing
     non-correlation of lon/lat/time xarray datasets for each lon/lat point.
@@ -218,49 +225,60 @@ def _pearsonr(x: xr.DataArray, y: xr.DataArray) -> xr.Dataset:
     :param x: lon/lat/time xr.DataArray
     :param y: xr.DataArray of the same spatiotemporal extents and resolution as
     x.
+    :param monitor: Monitor to use for monitoring the calculation
     :return: A dataset containing the correlation coefficients and p_values on
     the lon/lat grid of x and y.
 
     References
     ----------
     http://www.statsoft.com/textbook/glosp.html#Pearson%20Correlation
     """
-    n = len(x['time'])
-
-    xm, ym = x - x.mean(dim='time'), y - y.mean(dim='time')
-    xm_ym = xm * ym
-    r_num = xm_ym.sum(dim='time')
-    xm_squared = xr.ufuncs.square(xm)
-    ym_squared = xr.ufuncs.square(ym)
-    r_den = xr.ufuncs.sqrt(xm_squared.sum(dim='time') *
-                           ym_squared.sum(dim='time'))
-    r_den = r_den.where(r_den != 0)
-    r = r_num / r_den
-
-    # Presumably, if abs(r) > 1, then it is only some small artifact of floating
-    # point arithmetic.
-    # At this point r should be a lon/lat dataArray, so it should be safe to
-    # load it in memory explicitly. This may take time as it will kick-start
-    # deferred processing.
-    # Comparing with NaN produces warnings that can be safely ignored
-    default_warning_settings = np.seterr(invalid='ignore')
-    r.values[r.values < -1.0] = -1.0
-    r.values[r.values > 1.0] = 1.0
-    np.seterr(**default_warning_settings)
-    r.attrs = {'description': 'Correlation coefficients between'
-               ' {} and {}.'.format(x.name, y.name)}
-
-    df = n - 2
-    t_squared = xr.ufuncs.square(r) * (df / ((1.0 - r.where(r != 1)) *
-                                             (1.0 + r.where(r != -1))))
-    prob = df / (df + t_squared)
-    prob.values = betainc(0.5 * df, 0.5, prob.values)
-    prob.attrs = {'description': 'Rough indicator of probability of an'
-                  ' uncorrelated system producing datasets that have a Pearson'
-                  ' correlation at least as extreme as the one computed from'
-                  ' these datsets. Not entirely reliable, but reasonable for'
-                  ' datasets larger than 500 or so.'}
-
-    retset = xr.Dataset({'corr_coef': r,
-                         'p_value': prob})
+    with monitor.starting("Calculate Pearson correlation", total_work=6):
+        n = len(x['time'])
+
+        xm, ym = x - x.mean(dim='time'), y - y.mean(dim='time')
+        xm_ym = xm * ym
+        r_num = xm_ym.sum(dim='time')
+        xm_squared = xr.ufuncs.square(xm)
+        ym_squared = xr.ufuncs.square(ym)
+        r_den = xr.ufuncs.sqrt(xm_squared.sum(dim='time') *
+                               ym_squared.sum(dim='time'))
+        r_den = r_den.where(r_den != 0)
+        r = r_num / r_den
+
+        # Presumably, if abs(r) > 1, then it is only some small artifact of floating
+        # point arithmetic.
+        # At this point r should be a lon/lat dataArray, so it should be safe to
+        # load it in memory explicitly. This may take time as it will kick-start
+        # deferred processing.
+        # Comparing with NaN produces warnings that can be safely ignored
+        default_warning_settings = np.seterr(invalid='ignore')
+        with monitor.child(1).observing("task 1"):
+            negativ_r = r.values < -1.0
+        with monitor.child(1).observing("task 2"):
+            r.values[negativ_r] = -1.0
+        with monitor.child(1).observing("task 3"):
+            positiv_r = r.values > 1.0
+        with monitor.child(1).observing("task 4"):
+            r.values[positiv_r] = 1.0
+        np.seterr(**default_warning_settings)
+        r.attrs = {'description': 'Correlation coefficients between'
+                   ' {} and {}.'.format(x.name, y.name)}
+
+        df = n - 2
+        t_squared = xr.ufuncs.square(r) * (df / ((1.0 - r.where(r != 1)) *
+                                                 (1.0 + r.where(r != -1))))
+        prob = df / (df + t_squared)
+        with monitor.child(1).observing("task 5"):
+            prob_values_in = prob.values
+        with monitor.child(1).observing("task 6"):
+            prob.values = betainc(0.5 * df, 0.5, prob_values_in)
+        prob.attrs = {'description': 'Rough indicator of probability of an'
+                      ' uncorrelated system producing datasets that have a Pearson'
+                      ' correlation at least as extreme as the one computed from'
+                      ' these datsets. Not entirely reliable, but reasonable for'
+                      ' datasets larger than 500 or so.'}
+
+        retset = xr.Dataset({'corr_coef': r,
+                             'p_value': prob})
     return retset