Explicit chunking on all interaction simulate models (#870)

* explicit chunking on interaction simulate models * accounting for small and odd num_choosers * rethinking chunk overflow --------- Co-authored-by: Jeffrey Newman <[email protected]>
ActivitySim · May 22, 2024 · 29d12bc · 29d12bc
1 parent d8c5829
commit 29d12bc
Show file tree

Hide file tree

Showing 18 changed files with 111 additions and 30 deletions.
diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py
@@ -32,8 +32,11 @@ class AccessibilitySettings(PydanticReadable):
     SPEC: str = "accessibility.csv"
     """Filename for the accessibility specification (csv) file."""
 
-    explicit_chunk: int = 0
-    """If > 0, use this chunk size instead of adaptive chunking."""
+    explicit_chunk: float = 0
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    """
 
 
 @nb.njit

diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py
@@ -177,6 +177,12 @@ class DisaggregateAccessibilitySettings(PydanticReadable, extra="forbid"):
     """
     List of preprocessor settings to apply to the proto-population tables after generation.
     """
+    explicit_chunk: float | None = None
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    If not supplied or None, will default to the chunk size in the location choice model settings.
+    """
 
 
 def read_disaggregate_accessibility_yaml(
@@ -758,6 +764,11 @@ def get_disaggregate_logsums(
         model_settings = util.suffix_tables_in_settings(model_settings)
         model_settings.CHOOSER_ID_COLUMN = "proto_person_id"
 
+        # Can set explicit chunking for disaggregate accessibility
+        # Otherwise the explict_chunk will be set to whatever is in the location model settings
+        if disagg_model_settings.explicit_chunk is not None:
+            model_settings.explicit_chunk = disagg_model_settings.explicit_chunk
+
         # Include the suffix tags to pass onto downstream logsum models (e.g., tour mode choice)
         if model_settings.LOGSUM_SETTINGS:
             suffixes = util.concat_suffix_dict(disagg_model_settings.suffixes)

diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py
@@ -19,8 +19,6 @@
 from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
 from activitysim.core.util import reindex
 
-# import multiprocessing
-
 
 """
 The school/workplace location model predicts the zones in which various people will
@@ -192,6 +190,7 @@ def _location_sample(
         chunk_tag=chunk_tag,
         trace_label=trace_label,
         zone_layer=zone_layer,
+        explicit_chunk_size=model_settings.explicit_chunk,
         compute_settings=model_settings.compute_settings.subcomponent_settings(
             "sample"
         ),
@@ -699,6 +698,7 @@ def run_location_simulate(
         trace_choice_name=model_settings.DEST_CHOICE_COLUMN_NAME,
         estimator=estimator,
         skip_choice=skip_choice,
+        explicit_chunk_size=model_settings.explicit_chunk,
         compute_settings=model_settings.compute_settings.subcomponent_settings(
             "simulate"
         ),
@@ -1185,11 +1185,6 @@ def workplace_location(
             state, estimator, model_settings, "workplace_location.yaml"
         )
 
-    # FIXME - debugging code to test multiprocessing failure handling
-    # process_name = multiprocessing.current_process().name
-    # if multiprocessing.current_process().name =='mp_households_0':
-    #     raise RuntimeError(f"fake fail {process_name}")
-
     # disable locutor for benchmarking
     if state.settings.benchmarking:
         locutor = False

diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py
@@ -181,8 +181,11 @@ class NonMandatoryTourFrequencySettings(LogitComponentSettings, extra="forbid"):
     annotate_tours: PreprocessorSettings | None = None
     """Preprocessor settings to annotate tours"""
 
-    explicit_chunk: int = 0
-    """Number of rows to process in each chunk when explicit chunking is enabled"""
+    explicit_chunk: float = 0
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    """
 
 
 @workflow.step

diff --git a/activitysim/abm/models/parking_location_choice.py b/activitysim/abm/models/parking_location_choice.py
@@ -147,6 +147,7 @@ def parking_destination_simulate(
         chunk_size=chunk_size,
         trace_label=trace_label,
         trace_choice_name="parking_loc",
+        explicit_chunk_size=model_settings.explicit_chunk,
     )
 
     # drop any failed zero_prob destinations
@@ -355,6 +356,12 @@ class ParkingLocationSettings(LogitComponentSettings, extra="forbid"):
     """List of auto modes that use parking. AUTO_MODES are used in write_trip_matrices to make sure
     parking locations are accurately represented in the output trip matrices."""
 
+    explicit_chunk: float = 0
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    """
+
 
 @workflow.step
 def parking_location(

diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py
@@ -357,8 +357,11 @@ class SchoolEscortSettings(BaseLogitComponentSettings, extra="forbid"):
     no_escorting_alterative: int = 1
     """The alternative number for no escorting. Used to set the choice for households with no escortees."""
 
-    explicit_chunk: int = 0
-    """If > 0, use this chunk size instead of adaptive chunking."""
+    explicit_chunk: float = 0
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    """
 
     LOGIT_TYPE: Literal["MNL"] = "MNL"
     """Logit model mathematical form.

diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py
@@ -227,6 +227,7 @@ def _destination_sample(
         chunk_tag=chunk_tag,
         trace_label=trace_label,
         zone_layer=zone_layer,
+        explicit_chunk_size=model_settings.explicit_chunk,
         compute_settings=model_settings.compute_settings.subcomponent_settings(
             "sample"
         ),
@@ -700,6 +701,7 @@ def compute_ood_logsums(
     chunk_size,
     trace_label,
     chunk_tag,
+    explicit_chunk_size=0,
 ):
     """
     Compute one (of two) out-of-direction logsums for destination alternatives
@@ -733,6 +735,7 @@ def compute_ood_logsums(
         chunk_size=chunk_size,
         trace_label=trace_label,
         chunk_tag=chunk_tag,
+        explicit_chunk_size=explicit_chunk_size,
     )
 
     assert logsums.index.equals(choosers.index)
@@ -845,6 +848,7 @@ def compute_logsums(
         state.settings.chunk_size,
         trace_label=tracing.extend_trace_label(trace_label, "od"),
         chunk_tag=chunk_tag,
+        explicit_chunk_size=model_settings.explicit_chunk,
     )
 
     # - dp_logsums
@@ -874,6 +878,7 @@ def compute_logsums(
         state.settings.chunk_size,
         trace_label=tracing.extend_trace_label(trace_label, "dp"),
         chunk_tag=chunk_tag,
+        explicit_chunk_size=model_settings.explicit_chunk,
     )
 
     return destination_sample
@@ -973,6 +978,7 @@ def trip_destination_simulate(
         trace_label=trace_label,
         trace_choice_name="trip_dest",
         estimator=estimator,
+        explicit_chunk_size=model_settings.explicit_chunk,
     )
 
     if not want_logsums:

diff --git a/activitysim/abm/models/util/logsums.py b/activitysim/abm/models/util/logsums.py
@@ -261,6 +261,7 @@ def compute_location_choice_logsums(
         chunk_size=chunk_size,
         chunk_tag=chunk_tag,
         trace_label=trace_label,
+        explicit_chunk_size=model_settings.explicit_chunk,
         compute_settings=logsum_settings.compute_settings,
     )
 

diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py
@@ -123,6 +123,7 @@ def _destination_sample(
         chunk_tag=chunk_tag,
         trace_label=trace_label,
         zone_layer=zone_layer,
+        explicit_chunk_size=model_settings.explicit_chunk,
         compute_settings=model_settings.compute_settings.subcomponent_settings(
             "sample"
         ),

diff --git a/activitysim/abm/models/util/tour_od.py b/activitysim/abm/models/util/tour_od.py
@@ -216,6 +216,7 @@ def _od_sample(
         chunk_tag=chunk_tag,
         trace_label=trace_label,
         zone_layer="taz",
+        explicit_chunk_size=model_settings.explicit_chunk,
         compute_settings=model_settings.compute_settings.subcomponent_settings(
             "sample"
         ),
@@ -1057,6 +1058,7 @@ def run_od_simulate(
         trace_label=trace_label,
         trace_choice_name="origin_destination",
         estimator=estimator,
+        explicit_chunk_size=model_settings.explicit_chunk,
         compute_settings=model_settings.compute_settings,
     )
 

diff --git a/activitysim/abm/models/util/vectorize_tour_scheduling.py b/activitysim/abm/models/util/vectorize_tour_scheduling.py
@@ -59,6 +59,12 @@ class TourSchedulingSettings(LogitComponentSettings, extra="forbid"):
     this unsegmented SPEC should be omitted.
     """
 
+    explicit_chunk: float = 0
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    """
+
 
 def skims_for_logsums(
     state: workflow.State,
@@ -929,7 +935,11 @@ def schedule_tours(
         chunk_trace_label,
         chunk_sizer,
     ) in chunk.adaptive_chunked_choosers(
-        state, tours, tour_trace_label, tour_chunk_tag
+        state,
+        tours,
+        tour_trace_label,
+        tour_chunk_tag,
+        explicit_chunk_size=model_settings.explicit_chunk,
     ):
         choices = _schedule_tours(
             state,

diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py
@@ -585,8 +585,11 @@ class VehicleTypeChoiceSettings(LogitComponentSettings, extra="forbid"):
 
     FLEET_YEAR: int
 
-    explicit_chunk: int = 0
-    """If > 0, use this chunk size instead of adaptive chunking."""
+    explicit_chunk: float = 0
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    """
 
 
 @workflow.step

diff --git a/activitysim/core/chunk.py b/activitysim/core/chunk.py
@@ -1213,7 +1213,7 @@ def adaptive_chunked_choosers(
     chunk_tag: str = None,
     *,
     chunk_size: int | None = None,
-    explicit_chunk_size: int = 0,
+    explicit_chunk_size: float = 0,
 ):
     # generator to iterate over choosers
 
@@ -1232,12 +1232,16 @@ def adaptive_chunked_choosers(
 
     chunk_tag = chunk_tag or trace_label
 
+    num_choosers = len(choosers.index)
+
     if state.settings.chunk_training_mode == MODE_EXPLICIT:
-        chunk_size = explicit_chunk_size
+        if explicit_chunk_size < 1:
+            chunk_size = math.ceil(num_choosers * explicit_chunk_size)
+        else:
+            chunk_size = int(explicit_chunk_size)
     elif chunk_size is None:
         chunk_size = state.settings.chunk_size
 
-    num_choosers = len(choosers.index)
     assert num_choosers > 0
     assert chunk_size >= 0
 
@@ -1369,9 +1373,13 @@ def adaptive_chunked_choosers_and_alts(
     )
 
     if state.settings.chunk_training_mode == MODE_EXPLICIT:
-        chunk_size = explicit_chunk_size
+        if explicit_chunk_size < 1:
+            chunk_size = math.ceil(num_choosers * explicit_chunk_size)
+        else:
+            chunk_size = int(explicit_chunk_size)
     elif chunk_size is None:
         chunk_size = state.settings.chunk_size
+
     chunk_sizer = ChunkSizer(
         state,
         chunk_tag,
@@ -1397,16 +1405,16 @@ def adaptive_chunked_choosers_and_alts(
     while offset < num_choosers:
         i += 1
 
-        assert (
-            offset + rows_per_chunk <= num_choosers
-        ), f"i {i} offset {offset} rows_per_chunk {rows_per_chunk} num_choosers {num_choosers}"
-
         chunk_trace_label = trace_label_for_chunk(state, trace_label, chunk_size, i)
 
         with chunk_sizer.ledger():
             chooser_chunk = choosers[offset : offset + rows_per_chunk]
 
-            alt_end = alt_chunk_ends[offset + rows_per_chunk]
+            # protecting from overflow in the case of last chunk
+            if (offset + rows_per_chunk) >= len(alt_chunk_ends):
+                alt_end = alt_chunk_ends[len(alt_chunk_ends) - 1]
+            else:
+                alt_end = alt_chunk_ends[offset + rows_per_chunk]
             alternative_chunk = alternatives[alt_offset:alt_end]
 
             if check_assertions:

diff --git a/activitysim/core/configuration/logit.py b/activitysim/core/configuration/logit.py
@@ -190,6 +190,12 @@ class LocationComponentSettings(BaseLogitComponentSettings):
     LOGSUM_SETTINGS: Path
     """Settings for the logsum computation."""
 
+    explicit_chunk: float = 0
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    """
+
 
 class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"):
     # Logsum-related settings

diff --git a/activitysim/core/interaction_sample.py b/activitysim/core/interaction_sample.py
@@ -561,6 +561,7 @@ def interaction_sample(
     chunk_tag: str | None = None,
     trace_label: str | None = None,
     zone_layer: str | None = None,
+    explicit_chunk_size: float = 0,
     compute_settings: ComputeSettings | None = None,
 ):
     """
@@ -607,6 +608,9 @@ def interaction_sample(
         Specify which zone layer of the skims is to be used.  You cannot use the
         'maz' zone layer in a one-zone model, but you can use the 'taz' layer in
         a two- or three-zone model (e.g. for destination pre-sampling).
+    explicit_chunk_size : float, optional
+        If > 0, specifies the chunk size to use when chunking the interaction
+        simulation. If < 1, specifies the fraction of the total number of choosers.
 
     Returns
     -------
@@ -642,7 +646,9 @@ def interaction_sample(
         chooser_chunk,
         chunk_trace_label,
         chunk_sizer,
-    ) in chunk.adaptive_chunked_choosers(state, choosers, trace_label, chunk_tag):
+    ) in chunk.adaptive_chunked_choosers(
+        state, choosers, trace_label, chunk_tag, explicit_chunk_size=explicit_chunk_size
+    ):
         choices = _interaction_sample(
             state,
             chooser_chunk,

diff --git a/activitysim/core/interaction_sample_simulate.py b/activitysim/core/interaction_sample_simulate.py
@@ -412,6 +412,7 @@ def interaction_sample_simulate(
     trace_choice_name=None,
     estimator=None,
     skip_choice=False,
+    explicit_chunk_size=0,
     *,
     compute_settings: ComputeSettings | None = None,
 ):
@@ -455,6 +456,9 @@ def interaction_sample_simulate(
     skip_choice: bool
         This skips the logit choice step and simply returns the alternatives table with logsums
         (used in disaggregate accessibility)
+    explicit_chunk_size : float, optional
+        If > 0, specifies the chunk size to use when chunking the interaction
+        simulation. If < 1, specifies the fraction of the total number of choosers.
 
     Returns
     -------
@@ -484,7 +488,13 @@ def interaction_sample_simulate(
         chunk_trace_label,
         chunk_sizer,
     ) in chunk.adaptive_chunked_choosers_and_alts(
-        state, choosers, alternatives, trace_label, chunk_tag, chunk_size=chunk_size
+        state,
+        choosers,
+        alternatives,
+        trace_label,
+        chunk_tag,
+        chunk_size=chunk_size,
+        explicit_chunk_size=explicit_chunk_size,
     ):
         choices = _interaction_sample_simulate(
             state,

diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py
@@ -967,9 +967,9 @@ def interaction_simulate(
         when household tracing enabled. No tracing occurs if label is empty or None.
     trace_choice_name: str
         This is the column label to be used in trace file csv dump of choices
-    explicit_chunk_size : int, optional
+    explicit_chunk_size : float, optional
         If > 0, specifies the chunk size to use when chunking the interaction
-        simulation.
+        simulation. If < 1, specifies the fraction of the total number of choosers.
 
     Returns
     -------