Skip to content

Commit

Permalink
[Bugfix] Allow seed and meta geography to be the same (#139)
Browse files Browse the repository at this point in the history
* Fixes bug where if the seed geography is the same as the meta_geography, pandas has a small panic attack and the run will fail.

* add cytoolz to the "requirements"

* fix another activitysim change
  • Loading branch information
jamiecook authored Aug 19, 2021
1 parent 158fde1 commit fd303be
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ install:
- conda info -a
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION
- conda activate test-environment
- conda install pytest pytest-cov coveralls pycodestyle
- conda install pytest pytest-cov coveralls pycodestyle cytoolz
- pip install .
- pip freeze

Expand Down
10 changes: 5 additions & 5 deletions populationsim/steps/setup_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,11 @@ def add_geography_columns(incidence_table, households_df, crosswalk_df):
# add seed_geography col to incidence table
incidence_table[seed_geography] = households_df[seed_geography]

# add meta column to incidence table
seed_to_meta = \
crosswalk_df[[seed_geography, meta_geography]] \
.groupby(seed_geography, as_index=True).min()[meta_geography]
incidence_table[meta_geography] = incidence_table[seed_geography].map(seed_to_meta)
# add meta column to incidence table (unless it's already there)
if seed_geography != meta_geography:
tmp = crosswalk_df[list({seed_geography, meta_geography})]
seed_to_meta = tmp.groupby(seed_geography, as_index=True).min()[meta_geography]
incidence_table[meta_geography] = incidence_table[seed_geography].map(seed_to_meta)

return incidence_table

Expand Down
38 changes: 20 additions & 18 deletions populationsim/tests/run_mp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,56 +17,58 @@

def setup_dirs():

configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
mp_configs_dir = os.path.join(os.path.dirname(__file__), 'configs_mp')
configs_dir = os.path.join(os.path.dirname(__file__), "configs")
mp_configs_dir = os.path.join(os.path.dirname(__file__), "configs_mp")
inject.add_injectable("configs_dir", [mp_configs_dir, configs_dir])

output_dir = os.path.join(os.path.dirname(__file__), 'output')
output_dir = os.path.join(os.path.dirname(__file__), "output")
inject.add_injectable("output_dir", output_dir)

data_dir = os.path.join(os.path.dirname(__file__), 'data')
data_dir = os.path.join(os.path.dirname(__file__), "data")
inject.add_injectable("data_dir", data_dir)

tracing.config_logger()

tracing.delete_output_files('csv')
tracing.delete_output_files('txt')
tracing.delete_output_files('yaml')
tracing.delete_output_files("csv")
tracing.delete_output_files("txt")
tracing.delete_output_files("yaml")


def regress():

expanded_household_ids = pipeline.get_table('expanded_household_ids')
expanded_household_ids = pipeline.get_table("expanded_household_ids")
assert isinstance(expanded_household_ids, pd.DataFrame)
taz_hh_counts = expanded_household_ids.groupby('TAZ').size()
taz_hh_counts = expanded_household_ids.groupby("TAZ").size()
assert len(taz_hh_counts) == TAZ_COUNT
assert taz_hh_counts.loc[100] == TAZ_100_HH_COUNT

# output_tables action: skip
output_dir = inject.get_injectable('output_dir')
assert not os.path.exists(os.path.join(output_dir, 'households.csv'))
assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT_1.csv'))
output_dir = inject.get_injectable("output_dir")
assert not os.path.exists(os.path.join(output_dir, "households.csv"))
assert os.path.exists(os.path.join(output_dir, "summary_DISTRICT_1.csv"))


def test_mp_run():

setup_dirs()

# Debugging ----------------------
run_list = mp_tasks.get_run_list()
mp_tasks.print_run_list(run_list)
# --------------------------------

# do this after config.handle_standard_args, as command line args may override injectables
injectables = ['data_dir', 'configs_dir', 'output_dir']
# do this after config.handle_standard_args, as command line args
# may override injectables
injectables = ["data_dir", "configs_dir", "output_dir"]
injectables = {k: inject.get_injectable(k) for k in injectables}

# pipeline.run(models=run_list['models'], resume_after=run_list['resume_after'])
mp_tasks.run_multiprocess(injectables)

mp_tasks.run_multiprocess(run_list, injectables)
pipeline.open_pipeline('_')
pipeline.open_pipeline("_")
regress()
pipeline.close_pipeline()


if __name__ == '__main__':
if __name__ == "__main__":

test_mp_run()

0 comments on commit fd303be

Please sign in to comment.