From 29d12bcc054d9206b01318b352d1c33f12c7dca0 Mon Sep 17 00:00:00 2001 From: David Hensle <51132108+dhensle@users.noreply.github.com> Date: Wed, 22 May 2024 14:10:17 -0700 Subject: [PATCH] Explicit chunking on all interaction simulate models (#870) * explicit chunking on interaction simulate models * accounting for small and odd num_choosers * rethinking chunk overflow --------- Co-authored-by: Jeffrey Newman --- activitysim/abm/models/accessibility.py | 7 +++-- .../abm/models/disaggregate_accessibility.py | 11 ++++++++ activitysim/abm/models/location_choice.py | 9 ++----- .../models/non_mandatory_tour_frequency.py | 7 +++-- .../abm/models/parking_location_choice.py | 7 +++++ activitysim/abm/models/school_escorting.py | 7 +++-- activitysim/abm/models/trip_destination.py | 6 +++++ activitysim/abm/models/util/logsums.py | 1 + .../abm/models/util/tour_destination.py | 1 + activitysim/abm/models/util/tour_od.py | 2 ++ .../models/util/vectorize_tour_scheduling.py | 12 ++++++++- activitysim/abm/models/vehicle_type_choice.py | 7 +++-- activitysim/core/chunk.py | 26 ++++++++++++------- activitysim/core/configuration/logit.py | 6 +++++ activitysim/core/interaction_sample.py | 8 +++++- .../core/interaction_sample_simulate.py | 12 ++++++++- activitysim/core/interaction_simulate.py | 4 +-- activitysim/core/simulate.py | 8 +++++- 18 files changed, 111 insertions(+), 30 deletions(-) diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py index f82a34f67..4c87eddb7 100644 --- a/activitysim/abm/models/accessibility.py +++ b/activitysim/abm/models/accessibility.py @@ -32,8 +32,11 @@ class AccessibilitySettings(PydanticReadable): SPEC: str = "accessibility.csv" """Filename for the accessibility specification (csv) file.""" - explicit_chunk: int = 0 - """If > 0, use this chunk size instead of adaptive chunking.""" + explicit_chunk: float = 0 + """ + If > 0, use this chunk size instead of adaptive chunking. + If less than 1, use this fraction of the total number of rows. + """ @nb.njit diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py index 8d1102743..40265b798 100644 --- a/activitysim/abm/models/disaggregate_accessibility.py +++ b/activitysim/abm/models/disaggregate_accessibility.py @@ -177,6 +177,12 @@ class DisaggregateAccessibilitySettings(PydanticReadable, extra="forbid"): """ List of preprocessor settings to apply to the proto-population tables after generation. """ + explicit_chunk: float | None = None + """ + If > 0, use this chunk size instead of adaptive chunking. + If less than 1, use this fraction of the total number of rows. + If not supplied or None, will default to the chunk size in the location choice model settings. + """ def read_disaggregate_accessibility_yaml( @@ -758,6 +764,11 @@ def get_disaggregate_logsums( model_settings = util.suffix_tables_in_settings(model_settings) model_settings.CHOOSER_ID_COLUMN = "proto_person_id" + # Can set explicit chunking for disaggregate accessibility + # Otherwise the explict_chunk will be set to whatever is in the location model settings + if disagg_model_settings.explicit_chunk is not None: + model_settings.explicit_chunk = disagg_model_settings.explicit_chunk + # Include the suffix tags to pass onto downstream logsum models (e.g., tour mode choice) if model_settings.LOGSUM_SETTINGS: suffixes = util.concat_suffix_dict(disagg_model_settings.suffixes) diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index cb4de93b0..83e794b2b 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -19,8 +19,6 @@ from activitysim.core.interaction_sample_simulate import interaction_sample_simulate from activitysim.core.util import reindex -# import multiprocessing - """ The school/workplace location model predicts the zones in which various people will @@ -192,6 +190,7 @@ def _location_sample( chunk_tag=chunk_tag, trace_label=trace_label, zone_layer=zone_layer, + explicit_chunk_size=model_settings.explicit_chunk, compute_settings=model_settings.compute_settings.subcomponent_settings( "sample" ), @@ -699,6 +698,7 @@ def run_location_simulate( trace_choice_name=model_settings.DEST_CHOICE_COLUMN_NAME, estimator=estimator, skip_choice=skip_choice, + explicit_chunk_size=model_settings.explicit_chunk, compute_settings=model_settings.compute_settings.subcomponent_settings( "simulate" ), @@ -1185,11 +1185,6 @@ def workplace_location( state, estimator, model_settings, "workplace_location.yaml" ) - # FIXME - debugging code to test multiprocessing failure handling - # process_name = multiprocessing.current_process().name - # if multiprocessing.current_process().name =='mp_households_0': - # raise RuntimeError(f"fake fail {process_name}") - # disable locutor for benchmarking if state.settings.benchmarking: locutor = False diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index 5ec2c9407..e0fb4817e 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -181,8 +181,11 @@ class NonMandatoryTourFrequencySettings(LogitComponentSettings, extra="forbid"): annotate_tours: PreprocessorSettings | None = None """Preprocessor settings to annotate tours""" - explicit_chunk: int = 0 - """Number of rows to process in each chunk when explicit chunking is enabled""" + explicit_chunk: float = 0 + """ + If > 0, use this chunk size instead of adaptive chunking. + If less than 1, use this fraction of the total number of rows. + """ @workflow.step diff --git a/activitysim/abm/models/parking_location_choice.py b/activitysim/abm/models/parking_location_choice.py index c6e289764..674b950aa 100644 --- a/activitysim/abm/models/parking_location_choice.py +++ b/activitysim/abm/models/parking_location_choice.py @@ -147,6 +147,7 @@ def parking_destination_simulate( chunk_size=chunk_size, trace_label=trace_label, trace_choice_name="parking_loc", + explicit_chunk_size=model_settings.explicit_chunk, ) # drop any failed zero_prob destinations @@ -355,6 +356,12 @@ class ParkingLocationSettings(LogitComponentSettings, extra="forbid"): """List of auto modes that use parking. AUTO_MODES are used in write_trip_matrices to make sure parking locations are accurately represented in the output trip matrices.""" + explicit_chunk: float = 0 + """ + If > 0, use this chunk size instead of adaptive chunking. + If less than 1, use this fraction of the total number of rows. + """ + @workflow.step def parking_location( diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py index 7605d492d..22fea9e52 100644 --- a/activitysim/abm/models/school_escorting.py +++ b/activitysim/abm/models/school_escorting.py @@ -357,8 +357,11 @@ class SchoolEscortSettings(BaseLogitComponentSettings, extra="forbid"): no_escorting_alterative: int = 1 """The alternative number for no escorting. Used to set the choice for households with no escortees.""" - explicit_chunk: int = 0 - """If > 0, use this chunk size instead of adaptive chunking.""" + explicit_chunk: float = 0 + """ + If > 0, use this chunk size instead of adaptive chunking. + If less than 1, use this fraction of the total number of rows. + """ LOGIT_TYPE: Literal["MNL"] = "MNL" """Logit model mathematical form. diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index e214d56fd..2b6b5a2ff 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -227,6 +227,7 @@ def _destination_sample( chunk_tag=chunk_tag, trace_label=trace_label, zone_layer=zone_layer, + explicit_chunk_size=model_settings.explicit_chunk, compute_settings=model_settings.compute_settings.subcomponent_settings( "sample" ), @@ -700,6 +701,7 @@ def compute_ood_logsums( chunk_size, trace_label, chunk_tag, + explicit_chunk_size=0, ): """ Compute one (of two) out-of-direction logsums for destination alternatives @@ -733,6 +735,7 @@ def compute_ood_logsums( chunk_size=chunk_size, trace_label=trace_label, chunk_tag=chunk_tag, + explicit_chunk_size=explicit_chunk_size, ) assert logsums.index.equals(choosers.index) @@ -845,6 +848,7 @@ def compute_logsums( state.settings.chunk_size, trace_label=tracing.extend_trace_label(trace_label, "od"), chunk_tag=chunk_tag, + explicit_chunk_size=model_settings.explicit_chunk, ) # - dp_logsums @@ -874,6 +878,7 @@ def compute_logsums( state.settings.chunk_size, trace_label=tracing.extend_trace_label(trace_label, "dp"), chunk_tag=chunk_tag, + explicit_chunk_size=model_settings.explicit_chunk, ) return destination_sample @@ -973,6 +978,7 @@ def trip_destination_simulate( trace_label=trace_label, trace_choice_name="trip_dest", estimator=estimator, + explicit_chunk_size=model_settings.explicit_chunk, ) if not want_logsums: diff --git a/activitysim/abm/models/util/logsums.py b/activitysim/abm/models/util/logsums.py index 82eef1c60..a13b34344 100644 --- a/activitysim/abm/models/util/logsums.py +++ b/activitysim/abm/models/util/logsums.py @@ -261,6 +261,7 @@ def compute_location_choice_logsums( chunk_size=chunk_size, chunk_tag=chunk_tag, trace_label=trace_label, + explicit_chunk_size=model_settings.explicit_chunk, compute_settings=logsum_settings.compute_settings, ) diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py index 751301d6a..0891b8d21 100644 --- a/activitysim/abm/models/util/tour_destination.py +++ b/activitysim/abm/models/util/tour_destination.py @@ -123,6 +123,7 @@ def _destination_sample( chunk_tag=chunk_tag, trace_label=trace_label, zone_layer=zone_layer, + explicit_chunk_size=model_settings.explicit_chunk, compute_settings=model_settings.compute_settings.subcomponent_settings( "sample" ), diff --git a/activitysim/abm/models/util/tour_od.py b/activitysim/abm/models/util/tour_od.py index 06a9364ff..7c615142f 100644 --- a/activitysim/abm/models/util/tour_od.py +++ b/activitysim/abm/models/util/tour_od.py @@ -216,6 +216,7 @@ def _od_sample( chunk_tag=chunk_tag, trace_label=trace_label, zone_layer="taz", + explicit_chunk_size=model_settings.explicit_chunk, compute_settings=model_settings.compute_settings.subcomponent_settings( "sample" ), @@ -1057,6 +1058,7 @@ def run_od_simulate( trace_label=trace_label, trace_choice_name="origin_destination", estimator=estimator, + explicit_chunk_size=model_settings.explicit_chunk, compute_settings=model_settings.compute_settings, ) diff --git a/activitysim/abm/models/util/vectorize_tour_scheduling.py b/activitysim/abm/models/util/vectorize_tour_scheduling.py index 6bdc907bd..dfab8171d 100644 --- a/activitysim/abm/models/util/vectorize_tour_scheduling.py +++ b/activitysim/abm/models/util/vectorize_tour_scheduling.py @@ -59,6 +59,12 @@ class TourSchedulingSettings(LogitComponentSettings, extra="forbid"): this unsegmented SPEC should be omitted. """ + explicit_chunk: float = 0 + """ + If > 0, use this chunk size instead of adaptive chunking. + If less than 1, use this fraction of the total number of rows. + """ + def skims_for_logsums( state: workflow.State, @@ -929,7 +935,11 @@ def schedule_tours( chunk_trace_label, chunk_sizer, ) in chunk.adaptive_chunked_choosers( - state, tours, tour_trace_label, tour_chunk_tag + state, + tours, + tour_trace_label, + tour_chunk_tag, + explicit_chunk_size=model_settings.explicit_chunk, ): choices = _schedule_tours( state, diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index cfe6fc398..93caae038 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -585,8 +585,11 @@ class VehicleTypeChoiceSettings(LogitComponentSettings, extra="forbid"): FLEET_YEAR: int - explicit_chunk: int = 0 - """If > 0, use this chunk size instead of adaptive chunking.""" + explicit_chunk: float = 0 + """ + If > 0, use this chunk size instead of adaptive chunking. + If less than 1, use this fraction of the total number of rows. + """ @workflow.step diff --git a/activitysim/core/chunk.py b/activitysim/core/chunk.py index 6ad77a7be..7f09187f1 100644 --- a/activitysim/core/chunk.py +++ b/activitysim/core/chunk.py @@ -1213,7 +1213,7 @@ def adaptive_chunked_choosers( chunk_tag: str = None, *, chunk_size: int | None = None, - explicit_chunk_size: int = 0, + explicit_chunk_size: float = 0, ): # generator to iterate over choosers @@ -1232,12 +1232,16 @@ def adaptive_chunked_choosers( chunk_tag = chunk_tag or trace_label + num_choosers = len(choosers.index) + if state.settings.chunk_training_mode == MODE_EXPLICIT: - chunk_size = explicit_chunk_size + if explicit_chunk_size < 1: + chunk_size = math.ceil(num_choosers * explicit_chunk_size) + else: + chunk_size = int(explicit_chunk_size) elif chunk_size is None: chunk_size = state.settings.chunk_size - num_choosers = len(choosers.index) assert num_choosers > 0 assert chunk_size >= 0 @@ -1369,9 +1373,13 @@ def adaptive_chunked_choosers_and_alts( ) if state.settings.chunk_training_mode == MODE_EXPLICIT: - chunk_size = explicit_chunk_size + if explicit_chunk_size < 1: + chunk_size = math.ceil(num_choosers * explicit_chunk_size) + else: + chunk_size = int(explicit_chunk_size) elif chunk_size is None: chunk_size = state.settings.chunk_size + chunk_sizer = ChunkSizer( state, chunk_tag, @@ -1397,16 +1405,16 @@ def adaptive_chunked_choosers_and_alts( while offset < num_choosers: i += 1 - assert ( - offset + rows_per_chunk <= num_choosers - ), f"i {i} offset {offset} rows_per_chunk {rows_per_chunk} num_choosers {num_choosers}" - chunk_trace_label = trace_label_for_chunk(state, trace_label, chunk_size, i) with chunk_sizer.ledger(): chooser_chunk = choosers[offset : offset + rows_per_chunk] - alt_end = alt_chunk_ends[offset + rows_per_chunk] + # protecting from overflow in the case of last chunk + if (offset + rows_per_chunk) >= len(alt_chunk_ends): + alt_end = alt_chunk_ends[len(alt_chunk_ends) - 1] + else: + alt_end = alt_chunk_ends[offset + rows_per_chunk] alternative_chunk = alternatives[alt_offset:alt_end] if check_assertions: diff --git a/activitysim/core/configuration/logit.py b/activitysim/core/configuration/logit.py index 9c93d9f97..d03bcab77 100644 --- a/activitysim/core/configuration/logit.py +++ b/activitysim/core/configuration/logit.py @@ -190,6 +190,12 @@ class LocationComponentSettings(BaseLogitComponentSettings): LOGSUM_SETTINGS: Path """Settings for the logsum computation.""" + explicit_chunk: float = 0 + """ + If > 0, use this chunk size instead of adaptive chunking. + If less than 1, use this fraction of the total number of rows. + """ + class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"): # Logsum-related settings diff --git a/activitysim/core/interaction_sample.py b/activitysim/core/interaction_sample.py index 04bef4e9e..0afa6c3c4 100644 --- a/activitysim/core/interaction_sample.py +++ b/activitysim/core/interaction_sample.py @@ -561,6 +561,7 @@ def interaction_sample( chunk_tag: str | None = None, trace_label: str | None = None, zone_layer: str | None = None, + explicit_chunk_size: float = 0, compute_settings: ComputeSettings | None = None, ): """ @@ -607,6 +608,9 @@ def interaction_sample( Specify which zone layer of the skims is to be used. You cannot use the 'maz' zone layer in a one-zone model, but you can use the 'taz' layer in a two- or three-zone model (e.g. for destination pre-sampling). + explicit_chunk_size : float, optional + If > 0, specifies the chunk size to use when chunking the interaction + simulation. If < 1, specifies the fraction of the total number of choosers. Returns ------- @@ -642,7 +646,9 @@ def interaction_sample( chooser_chunk, chunk_trace_label, chunk_sizer, - ) in chunk.adaptive_chunked_choosers(state, choosers, trace_label, chunk_tag): + ) in chunk.adaptive_chunked_choosers( + state, choosers, trace_label, chunk_tag, explicit_chunk_size=explicit_chunk_size + ): choices = _interaction_sample( state, chooser_chunk, diff --git a/activitysim/core/interaction_sample_simulate.py b/activitysim/core/interaction_sample_simulate.py index 063da8167..2df1fe4ce 100644 --- a/activitysim/core/interaction_sample_simulate.py +++ b/activitysim/core/interaction_sample_simulate.py @@ -412,6 +412,7 @@ def interaction_sample_simulate( trace_choice_name=None, estimator=None, skip_choice=False, + explicit_chunk_size=0, *, compute_settings: ComputeSettings | None = None, ): @@ -455,6 +456,9 @@ def interaction_sample_simulate( skip_choice: bool This skips the logit choice step and simply returns the alternatives table with logsums (used in disaggregate accessibility) + explicit_chunk_size : float, optional + If > 0, specifies the chunk size to use when chunking the interaction + simulation. If < 1, specifies the fraction of the total number of choosers. Returns ------- @@ -484,7 +488,13 @@ def interaction_sample_simulate( chunk_trace_label, chunk_sizer, ) in chunk.adaptive_chunked_choosers_and_alts( - state, choosers, alternatives, trace_label, chunk_tag, chunk_size=chunk_size + state, + choosers, + alternatives, + trace_label, + chunk_tag, + chunk_size=chunk_size, + explicit_chunk_size=explicit_chunk_size, ): choices = _interaction_sample_simulate( state, diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py index 433e5e2c1..0a863b2e7 100644 --- a/activitysim/core/interaction_simulate.py +++ b/activitysim/core/interaction_simulate.py @@ -967,9 +967,9 @@ def interaction_simulate( when household tracing enabled. No tracing occurs if label is empty or None. trace_choice_name: str This is the column label to be used in trace file csv dump of choices - explicit_chunk_size : int, optional + explicit_chunk_size : float, optional If > 0, specifies the chunk size to use when chunking the interaction - simulation. + simulation. If < 1, specifies the fraction of the total number of choosers. Returns ------- diff --git a/activitysim/core/simulate.py b/activitysim/core/simulate.py index 35710d6ce..0dcb5a379 100644 --- a/activitysim/core/simulate.py +++ b/activitysim/core/simulate.py @@ -2028,6 +2028,7 @@ def simple_simulate_logsums( chunk_size=0, trace_label=None, chunk_tag=None, + explicit_chunk_size=0, compute_settings: ComputeSettings | None = None, ): """ @@ -2050,7 +2051,12 @@ def simple_simulate_logsums( chunk_trace_label, chunk_sizer, ) in chunk.adaptive_chunked_choosers( - state, choosers, trace_label, chunk_tag, chunk_size=chunk_size + state, + choosers, + trace_label, + chunk_tag, + chunk_size=chunk_size, + explicit_chunk_size=explicit_chunk_size, ): logsums = _simple_simulate_logsums( state,