Skip to content

Commit

Permalink
Add cols parameter to temps_arr_by_freq for selecting cols by label
Browse files Browse the repository at this point in the history
  • Loading branch information
nickpowersys committed Sep 16, 2016
1 parent deec459 commit 9c33c28
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 10 deletions.
19 changes: 17 additions & 2 deletions caar/histsummary.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,14 @@ def _get_time_label_of_data(df):
return None


def _get_time_column_of_data(df):
time_label = _get_time_label_of_data(df)
for i, label in enumerate(df.columns):
if label == time_label:
break
return i


def _get_label_of_first_data_column(df):
return df.columns[0]

Expand All @@ -447,6 +455,13 @@ def _get_labels_of_data_columns(df):
return col_labels


def _get_column_of_data_label(df, label):
for i, col_label in enumerate(df.columns):
if col_label == label:
break
return i


def squared_avg_daily_data_points_per_id(df):
""" Returns DataFrame grouped by the primary id (ThermostatId or
LocationId) and by day. The value column has the count of data points
Expand Down Expand Up @@ -504,13 +519,13 @@ def number_of_days(df):
return (last_day - first_day)/np.timedelta64(1, 'D')


def start_of_first_full_day_df(cycle_df):
def start_of_first_full_day_df(df):
""""Returns datetime.datetime value of the very beginning of the first
full day for which data is given in a pandas DataFrame. The DataFrame
must have a MultiIndex in which the time level of the index
contains timestamps."""
time_index_level = _get_time_level_of_df_multiindex(df)
earliest_timestamp = (cycle_df
earliest_timestamp = (df
.index
.get_level_values(level=time_index_level)
.min())
Expand Down
21 changes: 17 additions & 4 deletions caar/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import datetime as dt
import numpy as np
import pandas as pd
from caar.histsummary import location_id_of_thermo, _get_time_level_of_df_multiindex
from caar.histsummary import location_id_of_thermo, \
_get_time_level_of_df_multiindex, _get_time_column_of_data, \
_get_column_of_data_label

from future import standard_library
standard_library.install_aliases()
Expand Down Expand Up @@ -94,7 +96,8 @@ def on_off_status(df, id, start, end, freq='1min'):
starts_by_freq = (raw_record_starts
.snap(freq=freq)
.tolist())
raw_record_ends = pd.DatetimeIndex(records.iloc[:, 0]
time_column = _get_time_column_of_data(df)
raw_record_ends = pd.DatetimeIndex(records.iloc[:, time_column]
.tolist())
record_ends_by_freq = (raw_record_ends
.snap(freq=freq)
Expand All @@ -107,7 +110,7 @@ def on_off_status(df, id, start, end, freq='1min'):
return status_in_intervals


def temps_arr_by_freq(df, id, start, end, freq='1min', actuals_only=False):
def temps_arr_by_freq(df, id, start, end, cols=None, freq='1min', actuals_only=False):
"""Returns NumPy array containing timestamps ('times') and temperatures at the specified frequency. If *actuals_only* is True, only the observed temperatures will be returned in an array. Otherwise, by default, intervals without observations are filled with zeros.
Args:
Expand All @@ -119,6 +122,8 @@ def temps_arr_by_freq(df, id, start, end, freq='1min', actuals_only=False):
end (datetime.datetime): Last interval to include in output array.
cols (Optional[str or list of str]): Column heading/label or list of labels for column(s) should be in the output (array) as data. By default, the first data column on the left is in the output, while others are left out.
freq (str): Frequency of intervals in output, specified in format recognized by pandas.
actuals_only (Boolean): If True, return only actual observations. If False, return array with zeros for intervals without observations.
Expand All @@ -145,7 +150,15 @@ def temps_arr_by_freq(df, id, start, end, freq='1min', actuals_only=False):
timestamps_by_freq = (timestamps
.snap(freq=freq)
.tolist())
temps_by_minute = (records.iloc[:, 0]
if cols is None:
data_cols = 0
elif isinstance(cols, str):
data_cols = _get_column_of_data_label(cols)
elif isinstance(cols, list):
data_cols = []
for label in cols:
data_cols.append(_get_column_of_data_label(df, label))
temps_by_minute = (records.iloc[:, data_cols]
.tolist())
# Populate the array with temperatures.
for i in range(len(records)):
Expand Down
6 changes: 3 additions & 3 deletions conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@
# built documents.
#
# The short X.Y version.
version = '3.0-beta'
version = '3.1-beta'
# The full version, including alpha/beta/rc tags.
release = '3.0.1-beta'
release = '3.1.0-beta'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down Expand Up @@ -143,7 +143,7 @@

# The name for this set of Sphinx documents.
# "<project> v<release> documentation" by default.
html_title = 'caar v3.0.0-beta'
html_title = 'caar v3.1.0-beta'

# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name='caar',
version='3.0.1-beta',
version='3.1.0-beta',
url='http://github.com/nickpowersys/CaaR/',
license='BSD 3-Clause License',
author='Nicholas A. Brown',
Expand Down

0 comments on commit 9c33c28

Please sign in to comment.