[Bugfix] Allow seed and meta geography to be the same (#139)

* Fixes bug where if the seed geography is the same as the meta_geography, pandas has a small panic attack and the run will fail. * add cytoolz to the "requirements" * fix another activitysim change
ActivitySim · Aug 19, 2021 · fd303be · fd303be
1 parent 158fde1
commit fd303be
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 24 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -16,7 +16,7 @@ install:
 - conda info -a
 - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION
 - conda activate test-environment
-- conda install pytest pytest-cov coveralls pycodestyle
+- conda install pytest pytest-cov coveralls pycodestyle cytoolz
 - pip install .
 - pip freeze
 

diff --git a/populationsim/steps/setup_data_structures.py b/populationsim/steps/setup_data_structures.py
@@ -111,11 +111,11 @@ def add_geography_columns(incidence_table, households_df, crosswalk_df):
     # add seed_geography col to incidence table
     incidence_table[seed_geography] = households_df[seed_geography]
 
-    # add meta column to incidence table
-    seed_to_meta = \
-        crosswalk_df[[seed_geography, meta_geography]] \
-        .groupby(seed_geography, as_index=True).min()[meta_geography]
-    incidence_table[meta_geography] = incidence_table[seed_geography].map(seed_to_meta)
+    # add meta column to incidence table (unless it's already there)
+    if seed_geography != meta_geography:
+        tmp = crosswalk_df[list({seed_geography, meta_geography})]
+        seed_to_meta = tmp.groupby(seed_geography, as_index=True).min()[meta_geography]
+        incidence_table[meta_geography] = incidence_table[seed_geography].map(seed_to_meta)
 
     return incidence_table
 

diff --git a/populationsim/tests/run_mp.py b/populationsim/tests/run_mp.py
@@ -17,56 +17,58 @@
 
 def setup_dirs():
 
-    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
-    mp_configs_dir = os.path.join(os.path.dirname(__file__), 'configs_mp')
+    configs_dir = os.path.join(os.path.dirname(__file__), "configs")
+    mp_configs_dir = os.path.join(os.path.dirname(__file__), "configs_mp")
     inject.add_injectable("configs_dir", [mp_configs_dir, configs_dir])
 
-    output_dir = os.path.join(os.path.dirname(__file__), 'output')
+    output_dir = os.path.join(os.path.dirname(__file__), "output")
     inject.add_injectable("output_dir", output_dir)
 
-    data_dir = os.path.join(os.path.dirname(__file__), 'data')
+    data_dir = os.path.join(os.path.dirname(__file__), "data")
     inject.add_injectable("data_dir", data_dir)
 
     tracing.config_logger()
 
-    tracing.delete_output_files('csv')
-    tracing.delete_output_files('txt')
-    tracing.delete_output_files('yaml')
+    tracing.delete_output_files("csv")
+    tracing.delete_output_files("txt")
+    tracing.delete_output_files("yaml")
 
 
 def regress():
 
-    expanded_household_ids = pipeline.get_table('expanded_household_ids')
+    expanded_household_ids = pipeline.get_table("expanded_household_ids")
     assert isinstance(expanded_household_ids, pd.DataFrame)
-    taz_hh_counts = expanded_household_ids.groupby('TAZ').size()
+    taz_hh_counts = expanded_household_ids.groupby("TAZ").size()
     assert len(taz_hh_counts) == TAZ_COUNT
     assert taz_hh_counts.loc[100] == TAZ_100_HH_COUNT
 
     # output_tables action: skip
-    output_dir = inject.get_injectable('output_dir')
-    assert not os.path.exists(os.path.join(output_dir, 'households.csv'))
-    assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT_1.csv'))
+    output_dir = inject.get_injectable("output_dir")
+    assert not os.path.exists(os.path.join(output_dir, "households.csv"))
+    assert os.path.exists(os.path.join(output_dir, "summary_DISTRICT_1.csv"))
 
 
 def test_mp_run():
 
     setup_dirs()
 
+    # Debugging ----------------------
     run_list = mp_tasks.get_run_list()
     mp_tasks.print_run_list(run_list)
+    # --------------------------------
 
-    # do this after config.handle_standard_args, as command line args may override injectables
-    injectables = ['data_dir', 'configs_dir', 'output_dir']
+    # do this after config.handle_standard_args, as command line args
+    # may override injectables
+    injectables = ["data_dir", "configs_dir", "output_dir"]
     injectables = {k: inject.get_injectable(k) for k in injectables}
 
-    # pipeline.run(models=run_list['models'], resume_after=run_list['resume_after'])
+    mp_tasks.run_multiprocess(injectables)
 
-    mp_tasks.run_multiprocess(run_list, injectables)
-    pipeline.open_pipeline('_')
+    pipeline.open_pipeline("_")
     regress()
     pipeline.close_pipeline()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
 
     test_mp_run()