diff --git a/caar/config.ini b/caar/config.ini index c00f576..e391881 100644 --- a/caar/config.ini +++ b/caar/config.ini @@ -19,11 +19,11 @@ CYCLES_PICKLE_FILE = test_TX_cycles.pickle SENSOR_PICKLE_FILE = test_TX_sensors.pickle GEOSPATIAL_PICKLE_FILE = test_TX_geospatial.pickle ALL_STATES_CYCLES_PICKLED_OUT = all_states_cycles.pickle -ALL_STATES_INSIDE_PICKLED_OUT = all_states_sensors.pickle -ALL_STATES_OUTSIDE_PICKLED_OUT = all_states_geospatial.pickle +ALL_STATES_SENSOR_OBS_PICKLED_OUT = all_states_sensors.pickle +ALL_STATES_GEOSPATIAL_OBS_PICKLED_OUT = all_states_geospatial.pickle ALL_STATES_CYCLES_PICKLED = test_all_states_cycles.pickle -ALL_STATES_INSIDE_PICKLED = test_all_states_sensors.pickle -ALL_STATES_OUTSIDE_PICKLED = test_all_states_geospatial.pickle +ALL_STATES_SENSOR_OBS_PICKLED = test_all_states_sensors.pickle +ALL_STATES_GEOSPATIAL_OBS_PICKLED = test_all_states_geospatial.pickle [test_pickle_files_py2] # For Python 2.7 CYCLES_PICKLE_FILE_OUT = TX_cycles_py27.pickle @@ -33,11 +33,11 @@ CYCLES_PICKLE_FILE = test_TX_cycles_py27.pickle SENSOR_PICKLE_FILE = test_TX_sensors_py27.pickle GEOSPATIAL_PICKLE_FILE = test_TX_geospatial_py27.pickle ALL_STATES_CYCLES_PICKLED_OUT = all_states_cycles_py27.pickle -ALL_STATES_INSIDE_PICKLED_OUT = all_states_sensors_py27.pickle -ALL_STATES_OUTSIDE_PICKLED_OUT = all_states_geospatial_py27.pickle +ALL_STATES_SENSOR_OBS_PICKLED_OUT = all_states_sensors_py27.pickle +ALL_STATES_GEOSPATIAL_OBS_PICKLED_OUT = all_states_geospatial_py27.pickle ALL_STATES_CYCLES_PICKLED = test_all_states_cycles_py27.pickle -ALL_STATES_INSIDE_PICKLED = test_all_states_sensors_py27.pickle -ALL_STATES_OUTSIDE_PICKLED = test_all_states_geospatial_py27.pickle +ALL_STATES_SENSOR_OBS_PICKLED = test_all_states_sensors_py27.pickle +ALL_STATES_GEOSPATIAL_OBS_PICKLED = test_all_states_geospatial_py27.pickle [test_ids_and_states] SENSOR_ID1 = 92 diff --git a/caar/configparser_read.py b/caar/configparser_read.py index b30429a..6831afe 100644 --- a/caar/configparser_read.py +++ b/caar/configparser_read.py @@ -142,8 +142,8 @@ options_vals = ['CYCLES_PICKLE_FILE_OUT', 'SENSOR_PICKLE_FILE_OUT', 'GEOSPATIAL_PICKLE_FILE_OUT', 'CYCLES_PICKLE_FILE', 'SENSOR_PICKLE_FILE', 'GEOSPATIAL_PICKLE_FILE', - 'ALL_STATES_CYCLES_PICKLED_OUT', 'ALL_STATES_INSIDE_PICKLED_OUT', 'ALL_STATES_OUTSIDE_PICKLED_OUT', - 'ALL_STATES_CYCLES_PICKLED', 'ALL_STATES_INSIDE_PICKLED', 'ALL_STATES_OUTSIDE_PICKLED'] + 'ALL_STATES_CYCLES_PICKLED_OUT', 'ALL_STATES_SENSOR_OBS_PICKLED_OUT', 'ALL_STATES_GEOSPATIAL_OBS_PICKLED_OUT', + 'ALL_STATES_CYCLES_PICKLED', 'ALL_STATES_SENSOR_OBS_PICKLED', 'ALL_STATES_GEOSPATIAL_OBS_PICKLED'] for option_val in options_vals: vars()[option_val] = os.path.join(TEST_DIR, parser.get(test_pickle_section, option_val)) @@ -157,9 +157,9 @@ GEOSPATIAL_PICKLE_FILE = vars()['GEOSPATIAL_PICKLE_FILE'] ALL_STATES_CYCLES_PICKLED_OUT = vars()['ALL_STATES_CYCLES_PICKLED_OUT'] -ALL_STATES_INSIDE_PICKLED_OUT = vars()['ALL_STATES_INSIDE_PICKLED_OUT'] -ALL_STATES_OUTSIDE_PICKLED_OUT = vars()['ALL_STATES_OUTSIDE_PICKLED_OUT'] +ALL_STATES_SENSOR_OBS_PICKLED_OUT = vars()['ALL_STATES_SENSOR_OBS_PICKLED_OUT'] +ALL_STATES_GEOSPATIAL_OBS_PICKLED_OUT = vars()['ALL_STATES_GEOSPATIAL_OBS_PICKLED_OUT'] ALL_STATES_CYCLES_PICKLED = vars()['ALL_STATES_CYCLES_PICKLED'] -ALL_STATES_INSIDE_PICKLED = vars()['ALL_STATES_INSIDE_PICKLED'] -ALL_STATES_OUTSIDE_PICKLED = vars()['ALL_STATES_OUTSIDE_PICKLED'] +ALL_STATES_SENSOR_OBS_PICKLED = vars()['ALL_STATES_SENSOR_OBS_PICKLED'] +ALL_STATES_GEOSPATIAL_OBS_PICKLED = vars()['ALL_STATES_GEOSPATIAL_OBS_PICKLED'] diff --git a/caar/histsummary.py b/caar/histsummary.py index 5f0753c..b3e0de3 100644 --- a/caar/histsummary.py +++ b/caar/histsummary.py @@ -65,7 +65,7 @@ def consecutive_days_of_observations(id, devices_file, cycles_df, 'You may want to confirm whether the observations ' 'collected covered the entire day(s).') elif not include_first_and_last_days: - first_day_in_streak = obs_counts.index[1] # Second day because first may be partial + first_day_in_streak = obs_counts.index[1] # Second day because first may be partial last_day = obs_counts.index[-2] else: first_day_in_streak = obs_counts.index[0] @@ -97,72 +97,6 @@ def consecutive_days_of_observations(id, devices_file, cycles_df, 'Consecutive days']) return streaks_df - # possible_last_day = first_day + pd.Timedelta(days=1) - # last_day = first_day - # # Outer loop - # while last_day <= obs_counts.index[-1]: - # # event triggers that possible is not in index. - # - # if possible_last_day in obs_counts.index: - # last_day = possible_last_day - # possible_last_day += possible_last_day + pd.Timedelta(days=1) - # possible_last_day = first_day + pd.Timedelta(days=1) - # if first_day + pd.Timedelta(days=1) in obs_counts.index: - # possible_last_day = first_day + pd.Timedelta(days=1) - # else: - # last_day = first_day - # possible_last_day = first_day + pd.Timedelta(days=1) - # while possible_last_day in obs_counts.index: - # possible_last_day += pd.Timedelta(days=1) - # # loop until the possible_last_day is not in the index - # last_day = possible_last_day - # total_days = (last_day - first_day + pd.Timedelta(days=1)) / pd.Timedelta(days=1) - # first_day_dt = dt.date(first_day.year, first_day.month, - # first_day.day) - # last_day_dt = dt.date(last_day.year, last_day.month, - # last_day.day) - # streaks.append((id, first_day_dt, last_day_dt, total_days)) - # else: - # first_day += pd.Timedelta(days=1) - # - # prev_day = day0 - # next_day = day0 + pd.Timedelta(days=1) - # while next_day in obs_counts.index: - # prev_day = next_day - # next_day = day0 + pd.Timedelta(days=1) - # last_day = prev_day - # - # if - # if next_day in obs_counts.index: - # #day1 = day0 + pd.Timedelta(days=1) - # - # # streak_days += 1 - # # obs_index = obs_counts.index[-2] # debug only - # while day1 <= obs_counts.index[-2]: # Next to last (last may be partial) - # if day1 in obs_counts.index: - # streak_days += 1 - # day1 += pd.Timedelta(days=1) - # else: - # - # if streak_days >= 3: # Ignore first and last day (may be partial) - # first_day = day0 + pd.Timedelta(days=1) - # last_day = day1 - pd.Timedelta(days=1) - # total_days = (last_day - first_day + pd.Timedelta(days=1)) / pd.Timedelta(days=1) - # first_day_dt = dt.date(first_day.year, first_day.month, - # first_day.day) - # last_day_dt = dt.date(last_day.year, last_day.month, - # last_day.day) - # streaks.append((id, first_day_dt, last_day_dt, total_days)) - # streak_days = 0 - # day0 = day1 + pd.Timedelta(days=1) - # day1 = day0 + pd.Timedelta(days=1) - # streaks_arr = np.array(streaks) - # streaks_arr[streaks_arr[:, 1].argsort()] - # streaks_df = pd.DataFrame(data=streaks_arr, - # columns=['ID', 'First day', 'Last day', - # 'Consecutive days']) - # return streaks_df - def daily_cycle_sensor_and_geospatial_obs_counts(id, devices_file, cycles_df, sensors_df, geospatial_df=None): @@ -202,15 +136,13 @@ def daily_cycle_sensor_and_geospatial_obs_counts(id, devices_file, cycles_df, se for df in dfs) else: cycles, sensor = (df.set_index(df.index.droplevel()) for df in dfs) - cycles_time = _get_time_index_column_label(cycles_df) - cycles_sensors = pd.merge(cycles, sensor, left_index=cycles_time, + cycles_sensors = pd.merge(cycles, sensor, left_index=True, right_index=True, how='inner') cycle_end_time = _get_time_label_of_data(cycles_df) cycles_sensors.rename(columns={cycle_end_time: 'Cycles_obs'}) if geospatial_data: - geospatial_time_heading = _get_time_index_column_label(geospatial_df) return pd.merge(cycles_sensors, geospatial, left_index=True, - right_index=geospatial_time_heading) + right_index=True) else: return cycles_sensors diff --git a/tests/test_caar.py b/tests/test_caar.py index cec85c0..9168234 100755 --- a/tests/test_caar.py +++ b/tests/test_caar.py @@ -18,7 +18,7 @@ GEOSPATIAL_PICKLE_FILE_OUT, GEOSPATIAL_PICKLE_FILE, LOCATION_IDS, \ TEST_SENSORS_FILE, STATE, TEST_POSTAL_FILE, CYCLE_TYPE_COOL, \ TEST_SENSOR_OBS_FILE, TEST_GEOSPATIAL_OBS_FILE, ALL_STATES_CYCLES_PICKLED_OUT, \ - ALL_STATES_INSIDE_PICKLED_OUT, ALL_STATES_OUTSIDE_PICKLED_OUT, SENSOR_ID1, \ + ALL_STATES_SENSOR_OBS_PICKLED_OUT, ALL_STATES_GEOSPATIAL_OBS_PICKLED_OUT, SENSOR_ID1, \ LOCATION_ID1 standard_library.install_aliases() @@ -78,7 +78,7 @@ def postal_fixture(): def state_fixture(): return [STATE] -@slow + @pytest.mark.parametrize("data_file, states, sensors, postal, cycle, auto", [(TEST_CYCLES_FILE, STATE, TEST_SENSORS_FILE, TEST_POSTAL_FILE, CYCLE_TYPE_COOL, 'cycles'), @@ -100,7 +100,7 @@ def test_select_clean_auto(data_file, states, sensors, postal, cycle, auto): assert len(clean_dict) > 0 -@slow + @pytest.mark.parametrize("data_file, states, sensors, postal, cycle, auto", [(TEST_CYCLES_FILE, None, None, None, CYCLE_TYPE_COOL, 'cycles'), @@ -116,7 +116,7 @@ def test_col_meta_auto(data_file, states, sensors, postal, cycle, auto): assert len(col_meta) > 0 -@slow + @pytest.mark.parametrize("data_file, states, sensors, postal, cycle, auto", [(TEST_CYCLES_FILE, STATE, TEST_SENSORS_FILE, TEST_POSTAL_FILE, CYCLE_TYPE_COOL, None), @@ -136,7 +136,7 @@ def test_select_clean(data_file, states, sensors, postal, cycle, auto): assert len(clean_dict) > 0 -@slow + @pytest.mark.parametrize("tempdir, data_file, cycle, states_to_clean, " "expected_path, sensors, postal, auto, encoding", [(tmpdir(), TEST_CYCLES_FILE, CYCLE_TYPE_COOL, STATE, CYCLES_PICKLE_FILE_OUT, @@ -145,11 +145,11 @@ def test_select_clean(data_file, states, sensors, postal, cycle, auto): None, None, 'cycles', 'UTF-8'), (tmpdir(), TEST_SENSOR_OBS_FILE, None, STATE, SENSOR_PICKLE_FILE_OUT, TEST_SENSORS_FILE, TEST_POSTAL_FILE, 'sensors', 'UTF-8'), - (tmpdir(), TEST_SENSOR_OBS_FILE, None, None, ALL_STATES_INSIDE_PICKLED_OUT, + (tmpdir(), TEST_SENSOR_OBS_FILE, None, None, ALL_STATES_SENSOR_OBS_PICKLED_OUT, None, None, 'sensors', 'UTF-8'), (tmpdir(), TEST_GEOSPATIAL_OBS_FILE, None, STATE, GEOSPATIAL_PICKLE_FILE_OUT, TEST_SENSORS_FILE, TEST_POSTAL_FILE, 'geospatial', 'UTF-8'), - (tmpdir(), TEST_GEOSPATIAL_OBS_FILE, None, None, ALL_STATES_OUTSIDE_PICKLED_OUT, + (tmpdir(), TEST_GEOSPATIAL_OBS_FILE, None, None, ALL_STATES_GEOSPATIAL_OBS_PICKLED_OUT, None, None, 'geospatial', 'UTF-8')]) def test_pickle_cycles_inside_outside(tempdir, data_file, cycle, states_to_clean, expected_path, sensors, postal, auto, encoding): @@ -161,7 +161,7 @@ def test_pickle_cycles_inside_outside(tempdir, data_file, cycle, states_to_clean assert pickle_file == os.path.basename(expected_path) -@slow + @pytest.mark.parametrize("pickle_file, df_creation_func, id_type, ids", [(CYCLES_PICKLE_FILE, hi.create_cycles_df, 'device_ids', [SENSOR_ID1]), @@ -179,7 +179,7 @@ def test_df_creation(pickle_file, df_creation_func, id_type, ids): df = df_creation_func(pickle_file, **kwargs) assert isinstance(df, pd.DataFrame) -@slow + @pytest.mark.parametrize("data_file, states, sensors, postal, cycle, auto, df_creation_func, id_type, ids", [(TEST_CYCLES_FILE, STATE, TEST_SENSORS_FILE, TEST_POSTAL_FILE, CYCLE_TYPE_COOL, 'cycles', hi.create_cycles_df, @@ -210,7 +210,7 @@ def test_df_creation_after_dict(data_file, states, sensors, postal, cycle, auto, assert isinstance(df, pd.DataFrame) -@slow + @pytest.mark.parametrize("data_file, states, sensors, postal, cycle, df_creation_func, id_type, ids", [(TEST_CYCLES_FILE, STATE, TEST_SENSORS_FILE, TEST_POSTAL_FILE, CYCLE_TYPE_COOL, hi.create_cycles_df, @@ -239,7 +239,7 @@ def test_df_creation_after_fixed_dict(data_file, states, sensors, postal, cycle, df = df_creation_func(clean_dict, **kwargs) assert isinstance(df, pd.DataFrame) -@slow + @pytest.mark.parametrize("df_fixture, id, start, end, freq", [(cycle_df_fixture(), SENSOR_ID1, dt.datetime(2012, 6, 18, 21, 0, 0), dt.datetime(2012, 6, 18, 23, 0, 0), '1min30s'), @@ -255,7 +255,7 @@ def test_on_off_status_by_interval(df_fixture, id, start, end, freq): assert len(dt_intervals) > 0 assert len(on_off) == len(dt_intervals) -@slow + @pytest.mark.parametrize("df_fixture, id, start, end, freq", [(sensor_df_fixture(), SENSOR_ID1, dt.datetime(2011, 8, 4, 21, 0, 0), dt.datetime(2011, 8, 4, 23, 0, 0), '1min30s'), @@ -279,7 +279,7 @@ def test_temps_by_interval(df_fixture, id, start, end, freq): assert len(temps[0]) > 0 -@slow + @pytest.mark.parametrize("thermo_id, start, end, freq, cycle_df, inside_df, outside_df, thermo_file", [(SENSOR_ID1, dt.datetime(2011, 8, 4, 21, 0, 0), dt.datetime(2011, 8, 4, 23, 59, 0), '1min', @@ -299,12 +299,13 @@ def test_single_day_cycling_and_temps(thermo_id, start, end, freq, cycle_df, assert single_day_arr[1].shape[1] == 3 -@pytest.mark.parametrize("id, devices_file, cycles_df, sensors_df, geospatial_df", - [(92, TEST_SENSORS_FILE, cycle_df_fixture(), sensor_df_fixture(), geospatial_df_fixture())]) -def test_consecutive_days_of_observations(id, devices_file, cycles_df, sensors_df, geospatial_df): - # dummy = 1 # just here to lookw at the input variables for debugging - # cyc = cycles_df # just here for debugging - obs = hs.consecutive_days_of_observations(id, devices_file, cycles_df, sensors_df, geospatial_df) +@pytest.mark.parametrize("id, devices_file, cycles_df, sensors_df, geospatial_df, include_first_last_days", + [(92, TEST_SENSORS_FILE, cycle_df_fixture(), sensor_df_fixture(), geospatial_df_fixture(), + False)]) +def test_consecutive_days_of_observations(id, devices_file, cycles_df, sensors_df, geospatial_df, + include_first_last_days): + obs = hs.consecutive_days_of_observations(id, devices_file, cycles_df, sensors_df, geospatial_df=geospatial_df, + include_first_and_last_days=include_first_last_days) assert isinstance(obs, pd.DataFrame) assert len(obs) > 0