Merge pull request #591 from camsys/est-fix

Estimation fix
ActivitySim · Dec 30, 2022 · 7cc99ef · 7cc99ef
2 parents 58cf20e + 88e1bd5
commit 7cc99ef
Show file tree

Hide file tree

Showing 4 changed files with 1,257 additions and 1,062 deletions.
diff --git a/activitysim/estimation/larch/general.py b/activitysim/estimation/larch/general.py
@@ -1,18 +1,15 @@
-import itertools
 import logging
 import os
-import re
 from pathlib import Path
 from typing import Mapping
 
 import numpy as np
 import pandas as pd
-import yaml
-from larch import DataFrames, Model, P, X
+from larch import DataFrames, Model, P, X  # noqa: F401
 from larch.log import logger_name
 from larch.model.abstract_model import AbstractChoiceModel
 from larch.model.tree import NestingTree
-from larch.util import Dict
+from larch.util import Dict  # noqa: F401
 
 _logger = logging.getLogger(logger_name)
 
@@ -490,13 +487,24 @@ def clean_values(
     return values
 
 
-def update_coefficients(model, data, result_dir=Path("."), output_file=None):
+def update_coefficients(
+    model, data, result_dir=Path("."), output_file=None, relabel_coef=None
+):
     if isinstance(data, pd.DataFrame):
         coefficients = data.copy()
     else:
         coefficients = data.coefficients.copy()
-    est_names = [j for j in coefficients.index if j in model.pf.index]
-    coefficients.loc[est_names, "value"] = model.pf.loc[est_names, "value"]
+    if relabel_coef is not None and len(relabel_coef):
+        for j in coefficients.index:
+            if j in model.pf.index:
+                coefficients.loc[j, "value"] = model.pf.loc[j, "value"]
+            else:
+                j_ = relabel_coef.get(j, None)
+                if j_ is not None and j_ in model.pf.index:
+                    coefficients.loc[j, "value"] = model.pf.loc[j_, "value"]
+    else:
+        est_names = [j for j in coefficients.index if j in model.pf.index]
+        coefficients.loc[est_names, "value"] = model.pf.loc[est_names, "value"]
     if output_file is not None:
         os.makedirs(result_dir, exist_ok=True)
         coefficients.reset_index().to_csv(

diff --git a/activitysim/estimation/larch/nonmand_tour_freq.py b/activitysim/estimation/larch/nonmand_tour_freq.py
@@ -1,14 +1,10 @@
-import itertools
 import logging
 import os
-import re
 from pathlib import Path
-from typing import Mapping
 
-import numpy as np
 import pandas as pd
 import yaml
-from larch import DataFrames, Model, P, X
+from larch import DataFrames, Model
 from larch.log import logger_name
 from larch.util import Dict
 
@@ -125,17 +121,34 @@ def unavail(model, x_ca):
 def nonmand_tour_freq_model(
     edb_directory="output/estimation_data_bundle/{name}/",
     return_data=False,
+    condense_parameters=False,
 ):
+    """
+    Prepare nonmandatory tour frequency models for estimation.
+
+    Parameters
+    ----------
+    edb_directory : str
+        Location of estimation data bundle for these models.
+    return_data : bool, default False
+        Whether to return the data used in preparing this function.
+        If returned, data is a dict in the second return value.
+    condense_parameters : bool, default False
+        Apply a transformation whereby all parameters in each model that
+        have the same initial value are converted to have the same name
+        (and thus to be the same parameter, used in various places).
+    """
     data = interaction_simulate_data(
         name="non_mandatory_tour_frequency",
         edb_directory=edb_directory,
     )
 
     settings = data.settings
     segment_names = [s["NAME"] for s in settings["SPEC_SEGMENTS"]]
-    data.relabel_coef = link_same_value_coefficients(
-        segment_names, data.coefficients, data.spec
-    )
+    if condense_parameters:
+        data.relabel_coef = link_same_value_coefficients(
+            segment_names, data.coefficients, data.spec
+        )
     spec = data.spec
     coefficients = data.coefficients
     chooser_data = data.chooser_data

diff --git a/activitysim/estimation/test/test_larch_estimation.py b/activitysim/estimation/test/test_larch_estimation.py
@@ -279,12 +279,24 @@ def test_tour_and_subtour_mode_choice(est_data, num_regression, dataframe_regres
 def test_nonmand_tour_freq(est_data, num_regression, dataframe_regression):
     from activitysim.estimation.larch.nonmand_tour_freq import nonmand_tour_freq_model
 
-    m = nonmand_tour_freq_model()
+    m = nonmand_tour_freq_model(condense_parameters=True)
     loglike_prior = {}
+    expected_n_params = {
+        "PTYPE_FULL": 72,
+        "PTYPE_PART": 51,
+        "PTYPE_UNIVERSITY": 70,
+        "PTYPE_NONWORK": 77,
+        "PTYPE_RETIRED": 53,
+        "PTYPE_DRIVING": 43,
+        "PTYPE_SCHOOL": 34,
+        "PTYPE_PRESCHOOL": 25,
+    }
     for segment_name in m:
         m[segment_name].load_data()
         m[segment_name].doctor(repair_ch_av="-")
         loglike_prior[segment_name] = m[segment_name].loglike()
+        assert len(m[segment_name].pf) == expected_n_params[segment_name]
+        assert len(m[segment_name].utility_ca) == 210
     r = {}
     for segment_name in m:
         r[segment_name] = m[segment_name].maximize_loglike(
@@ -297,3 +309,14 @@ def test_nonmand_tour_freq(est_data, num_regression, dataframe_regression):
         basename="test_nonmand_tour_freq_loglike",
     )
     _regression_check(dataframe_regression, pd.concat([x.pf for x in m.values()]))
+
+
+def test_nonmand_tour_freq_not_condensed(
+    est_data, num_regression, dataframe_regression
+):
+    from activitysim.estimation.larch.nonmand_tour_freq import nonmand_tour_freq_model
+
+    m = nonmand_tour_freq_model(condense_parameters=False)
+    for segment_name in m:
+        assert len(m[segment_name].pf) == 210
+        assert len(m[segment_name].utility_ca) == 210