Add cols parameter to temps_arr_by_freq for selecting cols by label

nickpowersys · Sep 16, 2016 · 9c33c28 · 9c33c28
1 parent deec459
commit 9c33c28
Show file tree

Hide file tree

Showing 4 changed files with 38 additions and 10 deletions.
diff --git a/caar/histsummary.py b/caar/histsummary.py
@@ -436,6 +436,14 @@ def _get_time_label_of_data(df):
     return None
 
 
+def _get_time_column_of_data(df):
+    time_label = _get_time_label_of_data(df)
+    for i, label in enumerate(df.columns):
+        if label == time_label:
+            break
+    return i
+
+
 def _get_label_of_first_data_column(df):
     return df.columns[0]
 
@@ -447,6 +455,13 @@ def _get_labels_of_data_columns(df):
     return col_labels
 
 
+def _get_column_of_data_label(df, label):
+    for i, col_label in enumerate(df.columns):
+        if col_label == label:
+            break
+    return i
+
+
 def squared_avg_daily_data_points_per_id(df):
     """ Returns DataFrame grouped by the primary id (ThermostatId or
     LocationId) and by day. The value column has the count of data points
@@ -504,13 +519,13 @@ def number_of_days(df):
     return (last_day - first_day)/np.timedelta64(1, 'D')
 
 
-def start_of_first_full_day_df(cycle_df):
+def start_of_first_full_day_df(df):
     """"Returns datetime.datetime value of the very beginning of the first
     full day for which data is given in a pandas DataFrame. The DataFrame
     must have a MultiIndex in which the time level of the index
     contains timestamps."""
     time_index_level = _get_time_level_of_df_multiindex(df)
-    earliest_timestamp = (cycle_df
+    earliest_timestamp = (df
                           .index
                           .get_level_values(level=time_index_level)
                           .min())

diff --git a/caar/timeseries.py b/caar/timeseries.py
@@ -2,7 +2,9 @@
 import datetime as dt
 import numpy as np
 import pandas as pd
-from caar.histsummary import location_id_of_thermo, _get_time_level_of_df_multiindex
+from caar.histsummary import location_id_of_thermo, \
+    _get_time_level_of_df_multiindex, _get_time_column_of_data, \
+    _get_column_of_data_label
 
 from future import standard_library
 standard_library.install_aliases()
@@ -94,7 +96,8 @@ def on_off_status(df, id, start, end, freq='1min'):
     starts_by_freq = (raw_record_starts
                       .snap(freq=freq)
                       .tolist())
-    raw_record_ends = pd.DatetimeIndex(records.iloc[:, 0]
+    time_column = _get_time_column_of_data(df)
+    raw_record_ends = pd.DatetimeIndex(records.iloc[:, time_column]
                                        .tolist())
     record_ends_by_freq = (raw_record_ends
                            .snap(freq=freq)
@@ -107,7 +110,7 @@ def on_off_status(df, id, start, end, freq='1min'):
     return status_in_intervals
 
 
-def temps_arr_by_freq(df, id, start, end, freq='1min', actuals_only=False):
+def temps_arr_by_freq(df, id, start, end, cols=None, freq='1min', actuals_only=False):
     """Returns NumPy array containing timestamps ('times') and temperatures at the specified frequency. If *actuals_only* is True, only the observed temperatures will be returned in an array. Otherwise, by default, intervals without observations are filled with zeros.
 
     Args:
@@ -119,6 +122,8 @@ def temps_arr_by_freq(df, id, start, end, freq='1min', actuals_only=False):
 
         end (datetime.datetime): Last interval to include in output array.
 
+        cols (Optional[str or list of str]): Column heading/label or list of labels for column(s) should be in the output (array) as data. By default, the first data column on the left is in the output, while others are left out.
+
         freq (str): Frequency of intervals in output, specified in format recognized by pandas.
 
         actuals_only (Boolean): If True, return only actual observations. If False, return array with zeros for intervals without observations.
@@ -145,7 +150,15 @@ def temps_arr_by_freq(df, id, start, end, freq='1min', actuals_only=False):
     timestamps_by_freq = (timestamps
                           .snap(freq=freq)
                           .tolist())
-    temps_by_minute = (records.iloc[:, 0]
+    if cols is None:
+        data_cols = 0
+    elif isinstance(cols, str):
+        data_cols = _get_column_of_data_label(cols)
+    elif isinstance(cols, list):
+        data_cols = []
+        for label in cols:
+            data_cols.append(_get_column_of_data_label(df, label))
+    temps_by_minute = (records.iloc[:, data_cols]
                        .tolist())
     # Populate the array with temperatures.
     for i in range(len(records)):

diff --git a/conf.py b/conf.py
@@ -77,9 +77,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '3.0-beta'
+version = '3.1-beta'
 # The full version, including alpha/beta/rc tags.
-release = '3.0.1-beta'
+release = '3.1.0-beta'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -143,7 +143,7 @@
 
 # The name for this set of Sphinx documents.
 # "<project> v<release> documentation" by default.
-html_title = 'caar v3.0.0-beta'
+html_title = 'caar v3.1.0-beta'
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
 #html_short_title = None

diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 
 setup(
     name='caar',
-    version='3.0.1-beta',
+    version='3.1.0-beta',
     url='http://github.com/nickpowersys/CaaR/',
     license='BSD 3-Clause License',
     author='Nicholas A. Brown',