Skip to content

Commit

Permalink
Fixed daily_cycle_sensor_and_geospatial_obs_counts
Browse files Browse the repository at this point in the history
  • Loading branch information
nickpowersys committed Dec 3, 2016
1 parent fee220e commit 98b984d
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 104 deletions.
16 changes: 8 additions & 8 deletions caar/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ CYCLES_PICKLE_FILE = test_TX_cycles.pickle
SENSOR_PICKLE_FILE = test_TX_sensors.pickle
GEOSPATIAL_PICKLE_FILE = test_TX_geospatial.pickle
ALL_STATES_CYCLES_PICKLED_OUT = all_states_cycles.pickle
ALL_STATES_INSIDE_PICKLED_OUT = all_states_sensors.pickle
ALL_STATES_OUTSIDE_PICKLED_OUT = all_states_geospatial.pickle
ALL_STATES_SENSOR_OBS_PICKLED_OUT = all_states_sensors.pickle
ALL_STATES_GEOSPATIAL_OBS_PICKLED_OUT = all_states_geospatial.pickle
ALL_STATES_CYCLES_PICKLED = test_all_states_cycles.pickle
ALL_STATES_INSIDE_PICKLED = test_all_states_sensors.pickle
ALL_STATES_OUTSIDE_PICKLED = test_all_states_geospatial.pickle
ALL_STATES_SENSOR_OBS_PICKLED = test_all_states_sensors.pickle
ALL_STATES_GEOSPATIAL_OBS_PICKLED = test_all_states_geospatial.pickle

[test_pickle_files_py2] # For Python 2.7
CYCLES_PICKLE_FILE_OUT = TX_cycles_py27.pickle
Expand All @@ -33,11 +33,11 @@ CYCLES_PICKLE_FILE = test_TX_cycles_py27.pickle
SENSOR_PICKLE_FILE = test_TX_sensors_py27.pickle
GEOSPATIAL_PICKLE_FILE = test_TX_geospatial_py27.pickle
ALL_STATES_CYCLES_PICKLED_OUT = all_states_cycles_py27.pickle
ALL_STATES_INSIDE_PICKLED_OUT = all_states_sensors_py27.pickle
ALL_STATES_OUTSIDE_PICKLED_OUT = all_states_geospatial_py27.pickle
ALL_STATES_SENSOR_OBS_PICKLED_OUT = all_states_sensors_py27.pickle
ALL_STATES_GEOSPATIAL_OBS_PICKLED_OUT = all_states_geospatial_py27.pickle
ALL_STATES_CYCLES_PICKLED = test_all_states_cycles_py27.pickle
ALL_STATES_INSIDE_PICKLED = test_all_states_sensors_py27.pickle
ALL_STATES_OUTSIDE_PICKLED = test_all_states_geospatial_py27.pickle
ALL_STATES_SENSOR_OBS_PICKLED = test_all_states_sensors_py27.pickle
ALL_STATES_GEOSPATIAL_OBS_PICKLED = test_all_states_geospatial_py27.pickle

[test_ids_and_states]
SENSOR_ID1 = 92
Expand Down
12 changes: 6 additions & 6 deletions caar/configparser_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@

options_vals = ['CYCLES_PICKLE_FILE_OUT', 'SENSOR_PICKLE_FILE_OUT', 'GEOSPATIAL_PICKLE_FILE_OUT',
'CYCLES_PICKLE_FILE', 'SENSOR_PICKLE_FILE', 'GEOSPATIAL_PICKLE_FILE',
'ALL_STATES_CYCLES_PICKLED_OUT', 'ALL_STATES_INSIDE_PICKLED_OUT', 'ALL_STATES_OUTSIDE_PICKLED_OUT',
'ALL_STATES_CYCLES_PICKLED', 'ALL_STATES_INSIDE_PICKLED', 'ALL_STATES_OUTSIDE_PICKLED']
'ALL_STATES_CYCLES_PICKLED_OUT', 'ALL_STATES_SENSOR_OBS_PICKLED_OUT', 'ALL_STATES_GEOSPATIAL_OBS_PICKLED_OUT',
'ALL_STATES_CYCLES_PICKLED', 'ALL_STATES_SENSOR_OBS_PICKLED', 'ALL_STATES_GEOSPATIAL_OBS_PICKLED']

for option_val in options_vals:
vars()[option_val] = os.path.join(TEST_DIR, parser.get(test_pickle_section, option_val))
Expand All @@ -157,9 +157,9 @@
GEOSPATIAL_PICKLE_FILE = vars()['GEOSPATIAL_PICKLE_FILE']

ALL_STATES_CYCLES_PICKLED_OUT = vars()['ALL_STATES_CYCLES_PICKLED_OUT']
ALL_STATES_INSIDE_PICKLED_OUT = vars()['ALL_STATES_INSIDE_PICKLED_OUT']
ALL_STATES_OUTSIDE_PICKLED_OUT = vars()['ALL_STATES_OUTSIDE_PICKLED_OUT']
ALL_STATES_SENSOR_OBS_PICKLED_OUT = vars()['ALL_STATES_SENSOR_OBS_PICKLED_OUT']
ALL_STATES_GEOSPATIAL_OBS_PICKLED_OUT = vars()['ALL_STATES_GEOSPATIAL_OBS_PICKLED_OUT']

ALL_STATES_CYCLES_PICKLED = vars()['ALL_STATES_CYCLES_PICKLED']
ALL_STATES_INSIDE_PICKLED = vars()['ALL_STATES_INSIDE_PICKLED']
ALL_STATES_OUTSIDE_PICKLED = vars()['ALL_STATES_OUTSIDE_PICKLED']
ALL_STATES_SENSOR_OBS_PICKLED = vars()['ALL_STATES_SENSOR_OBS_PICKLED']
ALL_STATES_GEOSPATIAL_OBS_PICKLED = vars()['ALL_STATES_GEOSPATIAL_OBS_PICKLED']
74 changes: 3 additions & 71 deletions caar/histsummary.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def consecutive_days_of_observations(id, devices_file, cycles_df,
'You may want to confirm whether the observations '
'collected covered the entire day(s).')
elif not include_first_and_last_days:
first_day_in_streak = obs_counts.index[1] # Second day because first may be partial
first_day_in_streak = obs_counts.index[1] # Second day because first may be partial
last_day = obs_counts.index[-2]
else:
first_day_in_streak = obs_counts.index[0]
Expand Down Expand Up @@ -97,72 +97,6 @@ def consecutive_days_of_observations(id, devices_file, cycles_df,
'Consecutive days'])
return streaks_df

# possible_last_day = first_day + pd.Timedelta(days=1)
# last_day = first_day
# # Outer loop
# while last_day <= obs_counts.index[-1]:
# # event triggers that possible is not in index.
#
# if possible_last_day in obs_counts.index:
# last_day = possible_last_day
# possible_last_day += possible_last_day + pd.Timedelta(days=1)
# possible_last_day = first_day + pd.Timedelta(days=1)
# if first_day + pd.Timedelta(days=1) in obs_counts.index:
# possible_last_day = first_day + pd.Timedelta(days=1)
# else:
# last_day = first_day
# possible_last_day = first_day + pd.Timedelta(days=1)
# while possible_last_day in obs_counts.index:
# possible_last_day += pd.Timedelta(days=1)
# # loop until the possible_last_day is not in the index
# last_day = possible_last_day
# total_days = (last_day - first_day + pd.Timedelta(days=1)) / pd.Timedelta(days=1)
# first_day_dt = dt.date(first_day.year, first_day.month,
# first_day.day)
# last_day_dt = dt.date(last_day.year, last_day.month,
# last_day.day)
# streaks.append((id, first_day_dt, last_day_dt, total_days))
# else:
# first_day += pd.Timedelta(days=1)
#
# prev_day = day0
# next_day = day0 + pd.Timedelta(days=1)
# while next_day in obs_counts.index:
# prev_day = next_day
# next_day = day0 + pd.Timedelta(days=1)
# last_day = prev_day
#
# if
# if next_day in obs_counts.index:
# #day1 = day0 + pd.Timedelta(days=1)
#
# # streak_days += 1
# # obs_index = obs_counts.index[-2] # debug only
# while day1 <= obs_counts.index[-2]: # Next to last (last may be partial)
# if day1 in obs_counts.index:
# streak_days += 1
# day1 += pd.Timedelta(days=1)
# else:
#
# if streak_days >= 3: # Ignore first and last day (may be partial)
# first_day = day0 + pd.Timedelta(days=1)
# last_day = day1 - pd.Timedelta(days=1)
# total_days = (last_day - first_day + pd.Timedelta(days=1)) / pd.Timedelta(days=1)
# first_day_dt = dt.date(first_day.year, first_day.month,
# first_day.day)
# last_day_dt = dt.date(last_day.year, last_day.month,
# last_day.day)
# streaks.append((id, first_day_dt, last_day_dt, total_days))
# streak_days = 0
# day0 = day1 + pd.Timedelta(days=1)
# day1 = day0 + pd.Timedelta(days=1)
# streaks_arr = np.array(streaks)
# streaks_arr[streaks_arr[:, 1].argsort()]
# streaks_df = pd.DataFrame(data=streaks_arr,
# columns=['ID', 'First day', 'Last day',
# 'Consecutive days'])
# return streaks_df


def daily_cycle_sensor_and_geospatial_obs_counts(id, devices_file, cycles_df, sensors_df,
geospatial_df=None):
Expand Down Expand Up @@ -202,15 +136,13 @@ def daily_cycle_sensor_and_geospatial_obs_counts(id, devices_file, cycles_df, se
for df in dfs)
else:
cycles, sensor = (df.set_index(df.index.droplevel()) for df in dfs)
cycles_time = _get_time_index_column_label(cycles_df)
cycles_sensors = pd.merge(cycles, sensor, left_index=cycles_time,
cycles_sensors = pd.merge(cycles, sensor, left_index=True,
right_index=True, how='inner')
cycle_end_time = _get_time_label_of_data(cycles_df)
cycles_sensors.rename(columns={cycle_end_time: 'Cycles_obs'})
if geospatial_data:
geospatial_time_heading = _get_time_index_column_label(geospatial_df)
return pd.merge(cycles_sensors, geospatial, left_index=True,
right_index=geospatial_time_heading)
right_index=True)
else:
return cycles_sensors

Expand Down
39 changes: 20 additions & 19 deletions tests/test_caar.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
GEOSPATIAL_PICKLE_FILE_OUT, GEOSPATIAL_PICKLE_FILE, LOCATION_IDS, \
TEST_SENSORS_FILE, STATE, TEST_POSTAL_FILE, CYCLE_TYPE_COOL, \
TEST_SENSOR_OBS_FILE, TEST_GEOSPATIAL_OBS_FILE, ALL_STATES_CYCLES_PICKLED_OUT, \
ALL_STATES_INSIDE_PICKLED_OUT, ALL_STATES_OUTSIDE_PICKLED_OUT, SENSOR_ID1, \
ALL_STATES_SENSOR_OBS_PICKLED_OUT, ALL_STATES_GEOSPATIAL_OBS_PICKLED_OUT, SENSOR_ID1, \
LOCATION_ID1

standard_library.install_aliases()
Expand Down Expand Up @@ -78,7 +78,7 @@ def postal_fixture():
def state_fixture():
return [STATE]

@slow

@pytest.mark.parametrize("data_file, states, sensors, postal, cycle, auto",
[(TEST_CYCLES_FILE, STATE, TEST_SENSORS_FILE,
TEST_POSTAL_FILE, CYCLE_TYPE_COOL, 'cycles'),
Expand All @@ -100,7 +100,7 @@ def test_select_clean_auto(data_file, states, sensors, postal, cycle, auto):
assert len(clean_dict) > 0


@slow

@pytest.mark.parametrize("data_file, states, sensors, postal, cycle, auto",
[(TEST_CYCLES_FILE, None, None, None, CYCLE_TYPE_COOL,
'cycles'),
Expand All @@ -116,7 +116,7 @@ def test_col_meta_auto(data_file, states, sensors, postal, cycle, auto):
assert len(col_meta) > 0


@slow

@pytest.mark.parametrize("data_file, states, sensors, postal, cycle, auto",
[(TEST_CYCLES_FILE, STATE, TEST_SENSORS_FILE,
TEST_POSTAL_FILE, CYCLE_TYPE_COOL, None),
Expand All @@ -136,7 +136,7 @@ def test_select_clean(data_file, states, sensors, postal, cycle, auto):
assert len(clean_dict) > 0


@slow

@pytest.mark.parametrize("tempdir, data_file, cycle, states_to_clean, "
"expected_path, sensors, postal, auto, encoding",
[(tmpdir(), TEST_CYCLES_FILE, CYCLE_TYPE_COOL, STATE, CYCLES_PICKLE_FILE_OUT,
Expand All @@ -145,11 +145,11 @@ def test_select_clean(data_file, states, sensors, postal, cycle, auto):
None, None, 'cycles', 'UTF-8'),
(tmpdir(), TEST_SENSOR_OBS_FILE, None, STATE, SENSOR_PICKLE_FILE_OUT,
TEST_SENSORS_FILE, TEST_POSTAL_FILE, 'sensors', 'UTF-8'),
(tmpdir(), TEST_SENSOR_OBS_FILE, None, None, ALL_STATES_INSIDE_PICKLED_OUT,
(tmpdir(), TEST_SENSOR_OBS_FILE, None, None, ALL_STATES_SENSOR_OBS_PICKLED_OUT,
None, None, 'sensors', 'UTF-8'),
(tmpdir(), TEST_GEOSPATIAL_OBS_FILE, None, STATE, GEOSPATIAL_PICKLE_FILE_OUT,
TEST_SENSORS_FILE, TEST_POSTAL_FILE, 'geospatial', 'UTF-8'),
(tmpdir(), TEST_GEOSPATIAL_OBS_FILE, None, None, ALL_STATES_OUTSIDE_PICKLED_OUT,
(tmpdir(), TEST_GEOSPATIAL_OBS_FILE, None, None, ALL_STATES_GEOSPATIAL_OBS_PICKLED_OUT,
None, None, 'geospatial', 'UTF-8')])
def test_pickle_cycles_inside_outside(tempdir, data_file, cycle, states_to_clean, expected_path,
sensors, postal, auto, encoding):
Expand All @@ -161,7 +161,7 @@ def test_pickle_cycles_inside_outside(tempdir, data_file, cycle, states_to_clean
assert pickle_file == os.path.basename(expected_path)


@slow

@pytest.mark.parametrize("pickle_file, df_creation_func, id_type, ids",
[(CYCLES_PICKLE_FILE, hi.create_cycles_df,
'device_ids', [SENSOR_ID1]),
Expand All @@ -179,7 +179,7 @@ def test_df_creation(pickle_file, df_creation_func, id_type, ids):
df = df_creation_func(pickle_file, **kwargs)
assert isinstance(df, pd.DataFrame)

@slow

@pytest.mark.parametrize("data_file, states, sensors, postal, cycle, auto, df_creation_func, id_type, ids",
[(TEST_CYCLES_FILE, STATE, TEST_SENSORS_FILE,
TEST_POSTAL_FILE, CYCLE_TYPE_COOL, 'cycles', hi.create_cycles_df,
Expand Down Expand Up @@ -210,7 +210,7 @@ def test_df_creation_after_dict(data_file, states, sensors, postal, cycle, auto,
assert isinstance(df, pd.DataFrame)


@slow

@pytest.mark.parametrize("data_file, states, sensors, postal, cycle, df_creation_func, id_type, ids",
[(TEST_CYCLES_FILE, STATE, TEST_SENSORS_FILE,
TEST_POSTAL_FILE, CYCLE_TYPE_COOL, hi.create_cycles_df,
Expand Down Expand Up @@ -239,7 +239,7 @@ def test_df_creation_after_fixed_dict(data_file, states, sensors, postal, cycle,
df = df_creation_func(clean_dict, **kwargs)
assert isinstance(df, pd.DataFrame)

@slow

@pytest.mark.parametrize("df_fixture, id, start, end, freq",
[(cycle_df_fixture(), SENSOR_ID1, dt.datetime(2012, 6, 18, 21, 0, 0),
dt.datetime(2012, 6, 18, 23, 0, 0), '1min30s'),
Expand All @@ -255,7 +255,7 @@ def test_on_off_status_by_interval(df_fixture, id, start, end, freq):
assert len(dt_intervals) > 0
assert len(on_off) == len(dt_intervals)

@slow

@pytest.mark.parametrize("df_fixture, id, start, end, freq",
[(sensor_df_fixture(), SENSOR_ID1, dt.datetime(2011, 8, 4, 21, 0, 0),
dt.datetime(2011, 8, 4, 23, 0, 0), '1min30s'),
Expand All @@ -279,7 +279,7 @@ def test_temps_by_interval(df_fixture, id, start, end, freq):
assert len(temps[0]) > 0


@slow

@pytest.mark.parametrize("thermo_id, start, end, freq, cycle_df, inside_df, outside_df, thermo_file",
[(SENSOR_ID1, dt.datetime(2011, 8, 4, 21, 0, 0),
dt.datetime(2011, 8, 4, 23, 59, 0), '1min',
Expand All @@ -299,12 +299,13 @@ def test_single_day_cycling_and_temps(thermo_id, start, end, freq, cycle_df,
assert single_day_arr[1].shape[1] == 3


@pytest.mark.parametrize("id, devices_file, cycles_df, sensors_df, geospatial_df",
[(92, TEST_SENSORS_FILE, cycle_df_fixture(), sensor_df_fixture(), geospatial_df_fixture())])
def test_consecutive_days_of_observations(id, devices_file, cycles_df, sensors_df, geospatial_df):
# dummy = 1 # just here to lookw at the input variables for debugging
# cyc = cycles_df # just here for debugging
obs = hs.consecutive_days_of_observations(id, devices_file, cycles_df, sensors_df, geospatial_df)
@pytest.mark.parametrize("id, devices_file, cycles_df, sensors_df, geospatial_df, include_first_last_days",
[(92, TEST_SENSORS_FILE, cycle_df_fixture(), sensor_df_fixture(), geospatial_df_fixture(),
False)])
def test_consecutive_days_of_observations(id, devices_file, cycles_df, sensors_df, geospatial_df,
include_first_last_days):
obs = hs.consecutive_days_of_observations(id, devices_file, cycles_df, sensors_df, geospatial_df=geospatial_df,
include_first_and_last_days=include_first_last_days)
assert isinstance(obs, pd.DataFrame)
assert len(obs) > 0

Expand Down

0 comments on commit 98b984d

Please sign in to comment.