Skip to content

Commit

Permalink
Explicit chunking on all interaction simulate models (#870)
Browse files Browse the repository at this point in the history
* explicit chunking on interaction simulate models

* accounting for small and odd num_choosers

* rethinking chunk overflow

---------

Co-authored-by: Jeffrey Newman <[email protected]>
  • Loading branch information
dhensle and jpn-- authored May 22, 2024
1 parent d8c5829 commit 29d12bc
Show file tree
Hide file tree
Showing 18 changed files with 111 additions and 30 deletions.
7 changes: 5 additions & 2 deletions activitysim/abm/models/accessibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,11 @@ class AccessibilitySettings(PydanticReadable):
SPEC: str = "accessibility.csv"
"""Filename for the accessibility specification (csv) file."""

explicit_chunk: int = 0
"""If > 0, use this chunk size instead of adaptive chunking."""
explicit_chunk: float = 0
"""
If > 0, use this chunk size instead of adaptive chunking.
If less than 1, use this fraction of the total number of rows.
"""


@nb.njit
Expand Down
11 changes: 11 additions & 0 deletions activitysim/abm/models/disaggregate_accessibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,12 @@ class DisaggregateAccessibilitySettings(PydanticReadable, extra="forbid"):
"""
List of preprocessor settings to apply to the proto-population tables after generation.
"""
explicit_chunk: float | None = None
"""
If > 0, use this chunk size instead of adaptive chunking.
If less than 1, use this fraction of the total number of rows.
If not supplied or None, will default to the chunk size in the location choice model settings.
"""


def read_disaggregate_accessibility_yaml(
Expand Down Expand Up @@ -758,6 +764,11 @@ def get_disaggregate_logsums(
model_settings = util.suffix_tables_in_settings(model_settings)
model_settings.CHOOSER_ID_COLUMN = "proto_person_id"

# Can set explicit chunking for disaggregate accessibility
# Otherwise the explict_chunk will be set to whatever is in the location model settings
if disagg_model_settings.explicit_chunk is not None:
model_settings.explicit_chunk = disagg_model_settings.explicit_chunk

# Include the suffix tags to pass onto downstream logsum models (e.g., tour mode choice)
if model_settings.LOGSUM_SETTINGS:
suffixes = util.concat_suffix_dict(disagg_model_settings.suffixes)
Expand Down
9 changes: 2 additions & 7 deletions activitysim/abm/models/location_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
from activitysim.core.util import reindex

# import multiprocessing


"""
The school/workplace location model predicts the zones in which various people will
Expand Down Expand Up @@ -192,6 +190,7 @@ def _location_sample(
chunk_tag=chunk_tag,
trace_label=trace_label,
zone_layer=zone_layer,
explicit_chunk_size=model_settings.explicit_chunk,
compute_settings=model_settings.compute_settings.subcomponent_settings(
"sample"
),
Expand Down Expand Up @@ -699,6 +698,7 @@ def run_location_simulate(
trace_choice_name=model_settings.DEST_CHOICE_COLUMN_NAME,
estimator=estimator,
skip_choice=skip_choice,
explicit_chunk_size=model_settings.explicit_chunk,
compute_settings=model_settings.compute_settings.subcomponent_settings(
"simulate"
),
Expand Down Expand Up @@ -1185,11 +1185,6 @@ def workplace_location(
state, estimator, model_settings, "workplace_location.yaml"
)

# FIXME - debugging code to test multiprocessing failure handling
# process_name = multiprocessing.current_process().name
# if multiprocessing.current_process().name =='mp_households_0':
# raise RuntimeError(f"fake fail {process_name}")

# disable locutor for benchmarking
if state.settings.benchmarking:
locutor = False
Expand Down
7 changes: 5 additions & 2 deletions activitysim/abm/models/non_mandatory_tour_frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,11 @@ class NonMandatoryTourFrequencySettings(LogitComponentSettings, extra="forbid"):
annotate_tours: PreprocessorSettings | None = None
"""Preprocessor settings to annotate tours"""

explicit_chunk: int = 0
"""Number of rows to process in each chunk when explicit chunking is enabled"""
explicit_chunk: float = 0
"""
If > 0, use this chunk size instead of adaptive chunking.
If less than 1, use this fraction of the total number of rows.
"""


@workflow.step
Expand Down
7 changes: 7 additions & 0 deletions activitysim/abm/models/parking_location_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def parking_destination_simulate(
chunk_size=chunk_size,
trace_label=trace_label,
trace_choice_name="parking_loc",
explicit_chunk_size=model_settings.explicit_chunk,
)

# drop any failed zero_prob destinations
Expand Down Expand Up @@ -355,6 +356,12 @@ class ParkingLocationSettings(LogitComponentSettings, extra="forbid"):
"""List of auto modes that use parking. AUTO_MODES are used in write_trip_matrices to make sure
parking locations are accurately represented in the output trip matrices."""

explicit_chunk: float = 0
"""
If > 0, use this chunk size instead of adaptive chunking.
If less than 1, use this fraction of the total number of rows.
"""


@workflow.step
def parking_location(
Expand Down
7 changes: 5 additions & 2 deletions activitysim/abm/models/school_escorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,11 @@ class SchoolEscortSettings(BaseLogitComponentSettings, extra="forbid"):
no_escorting_alterative: int = 1
"""The alternative number for no escorting. Used to set the choice for households with no escortees."""

explicit_chunk: int = 0
"""If > 0, use this chunk size instead of adaptive chunking."""
explicit_chunk: float = 0
"""
If > 0, use this chunk size instead of adaptive chunking.
If less than 1, use this fraction of the total number of rows.
"""

LOGIT_TYPE: Literal["MNL"] = "MNL"
"""Logit model mathematical form.
Expand Down
6 changes: 6 additions & 0 deletions activitysim/abm/models/trip_destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ def _destination_sample(
chunk_tag=chunk_tag,
trace_label=trace_label,
zone_layer=zone_layer,
explicit_chunk_size=model_settings.explicit_chunk,
compute_settings=model_settings.compute_settings.subcomponent_settings(
"sample"
),
Expand Down Expand Up @@ -700,6 +701,7 @@ def compute_ood_logsums(
chunk_size,
trace_label,
chunk_tag,
explicit_chunk_size=0,
):
"""
Compute one (of two) out-of-direction logsums for destination alternatives
Expand Down Expand Up @@ -733,6 +735,7 @@ def compute_ood_logsums(
chunk_size=chunk_size,
trace_label=trace_label,
chunk_tag=chunk_tag,
explicit_chunk_size=explicit_chunk_size,
)

assert logsums.index.equals(choosers.index)
Expand Down Expand Up @@ -845,6 +848,7 @@ def compute_logsums(
state.settings.chunk_size,
trace_label=tracing.extend_trace_label(trace_label, "od"),
chunk_tag=chunk_tag,
explicit_chunk_size=model_settings.explicit_chunk,
)

# - dp_logsums
Expand Down Expand Up @@ -874,6 +878,7 @@ def compute_logsums(
state.settings.chunk_size,
trace_label=tracing.extend_trace_label(trace_label, "dp"),
chunk_tag=chunk_tag,
explicit_chunk_size=model_settings.explicit_chunk,
)

return destination_sample
Expand Down Expand Up @@ -973,6 +978,7 @@ def trip_destination_simulate(
trace_label=trace_label,
trace_choice_name="trip_dest",
estimator=estimator,
explicit_chunk_size=model_settings.explicit_chunk,
)

if not want_logsums:
Expand Down
1 change: 1 addition & 0 deletions activitysim/abm/models/util/logsums.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ def compute_location_choice_logsums(
chunk_size=chunk_size,
chunk_tag=chunk_tag,
trace_label=trace_label,
explicit_chunk_size=model_settings.explicit_chunk,
compute_settings=logsum_settings.compute_settings,
)

Expand Down
1 change: 1 addition & 0 deletions activitysim/abm/models/util/tour_destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def _destination_sample(
chunk_tag=chunk_tag,
trace_label=trace_label,
zone_layer=zone_layer,
explicit_chunk_size=model_settings.explicit_chunk,
compute_settings=model_settings.compute_settings.subcomponent_settings(
"sample"
),
Expand Down
2 changes: 2 additions & 0 deletions activitysim/abm/models/util/tour_od.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ def _od_sample(
chunk_tag=chunk_tag,
trace_label=trace_label,
zone_layer="taz",
explicit_chunk_size=model_settings.explicit_chunk,
compute_settings=model_settings.compute_settings.subcomponent_settings(
"sample"
),
Expand Down Expand Up @@ -1057,6 +1058,7 @@ def run_od_simulate(
trace_label=trace_label,
trace_choice_name="origin_destination",
estimator=estimator,
explicit_chunk_size=model_settings.explicit_chunk,
compute_settings=model_settings.compute_settings,
)

Expand Down
12 changes: 11 additions & 1 deletion activitysim/abm/models/util/vectorize_tour_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ class TourSchedulingSettings(LogitComponentSettings, extra="forbid"):
this unsegmented SPEC should be omitted.
"""

explicit_chunk: float = 0
"""
If > 0, use this chunk size instead of adaptive chunking.
If less than 1, use this fraction of the total number of rows.
"""


def skims_for_logsums(
state: workflow.State,
Expand Down Expand Up @@ -929,7 +935,11 @@ def schedule_tours(
chunk_trace_label,
chunk_sizer,
) in chunk.adaptive_chunked_choosers(
state, tours, tour_trace_label, tour_chunk_tag
state,
tours,
tour_trace_label,
tour_chunk_tag,
explicit_chunk_size=model_settings.explicit_chunk,
):
choices = _schedule_tours(
state,
Expand Down
7 changes: 5 additions & 2 deletions activitysim/abm/models/vehicle_type_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,8 +585,11 @@ class VehicleTypeChoiceSettings(LogitComponentSettings, extra="forbid"):

FLEET_YEAR: int

explicit_chunk: int = 0
"""If > 0, use this chunk size instead of adaptive chunking."""
explicit_chunk: float = 0
"""
If > 0, use this chunk size instead of adaptive chunking.
If less than 1, use this fraction of the total number of rows.
"""


@workflow.step
Expand Down
26 changes: 17 additions & 9 deletions activitysim/core/chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -1213,7 +1213,7 @@ def adaptive_chunked_choosers(
chunk_tag: str = None,
*,
chunk_size: int | None = None,
explicit_chunk_size: int = 0,
explicit_chunk_size: float = 0,
):
# generator to iterate over choosers

Expand All @@ -1232,12 +1232,16 @@ def adaptive_chunked_choosers(

chunk_tag = chunk_tag or trace_label

num_choosers = len(choosers.index)

if state.settings.chunk_training_mode == MODE_EXPLICIT:
chunk_size = explicit_chunk_size
if explicit_chunk_size < 1:
chunk_size = math.ceil(num_choosers * explicit_chunk_size)
else:
chunk_size = int(explicit_chunk_size)
elif chunk_size is None:
chunk_size = state.settings.chunk_size

num_choosers = len(choosers.index)
assert num_choosers > 0
assert chunk_size >= 0

Expand Down Expand Up @@ -1369,9 +1373,13 @@ def adaptive_chunked_choosers_and_alts(
)

if state.settings.chunk_training_mode == MODE_EXPLICIT:
chunk_size = explicit_chunk_size
if explicit_chunk_size < 1:
chunk_size = math.ceil(num_choosers * explicit_chunk_size)
else:
chunk_size = int(explicit_chunk_size)
elif chunk_size is None:
chunk_size = state.settings.chunk_size

chunk_sizer = ChunkSizer(
state,
chunk_tag,
Expand All @@ -1397,16 +1405,16 @@ def adaptive_chunked_choosers_and_alts(
while offset < num_choosers:
i += 1

assert (
offset + rows_per_chunk <= num_choosers
), f"i {i} offset {offset} rows_per_chunk {rows_per_chunk} num_choosers {num_choosers}"

chunk_trace_label = trace_label_for_chunk(state, trace_label, chunk_size, i)

with chunk_sizer.ledger():
chooser_chunk = choosers[offset : offset + rows_per_chunk]

alt_end = alt_chunk_ends[offset + rows_per_chunk]
# protecting from overflow in the case of last chunk
if (offset + rows_per_chunk) >= len(alt_chunk_ends):
alt_end = alt_chunk_ends[len(alt_chunk_ends) - 1]
else:
alt_end = alt_chunk_ends[offset + rows_per_chunk]
alternative_chunk = alternatives[alt_offset:alt_end]

if check_assertions:
Expand Down
6 changes: 6 additions & 0 deletions activitysim/core/configuration/logit.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,12 @@ class LocationComponentSettings(BaseLogitComponentSettings):
LOGSUM_SETTINGS: Path
"""Settings for the logsum computation."""

explicit_chunk: float = 0
"""
If > 0, use this chunk size instead of adaptive chunking.
If less than 1, use this fraction of the total number of rows.
"""


class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"):
# Logsum-related settings
Expand Down
8 changes: 7 additions & 1 deletion activitysim/core/interaction_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,7 @@ def interaction_sample(
chunk_tag: str | None = None,
trace_label: str | None = None,
zone_layer: str | None = None,
explicit_chunk_size: float = 0,
compute_settings: ComputeSettings | None = None,
):
"""
Expand Down Expand Up @@ -607,6 +608,9 @@ def interaction_sample(
Specify which zone layer of the skims is to be used. You cannot use the
'maz' zone layer in a one-zone model, but you can use the 'taz' layer in
a two- or three-zone model (e.g. for destination pre-sampling).
explicit_chunk_size : float, optional
If > 0, specifies the chunk size to use when chunking the interaction
simulation. If < 1, specifies the fraction of the total number of choosers.
Returns
-------
Expand Down Expand Up @@ -642,7 +646,9 @@ def interaction_sample(
chooser_chunk,
chunk_trace_label,
chunk_sizer,
) in chunk.adaptive_chunked_choosers(state, choosers, trace_label, chunk_tag):
) in chunk.adaptive_chunked_choosers(
state, choosers, trace_label, chunk_tag, explicit_chunk_size=explicit_chunk_size
):
choices = _interaction_sample(
state,
chooser_chunk,
Expand Down
12 changes: 11 additions & 1 deletion activitysim/core/interaction_sample_simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ def interaction_sample_simulate(
trace_choice_name=None,
estimator=None,
skip_choice=False,
explicit_chunk_size=0,
*,
compute_settings: ComputeSettings | None = None,
):
Expand Down Expand Up @@ -455,6 +456,9 @@ def interaction_sample_simulate(
skip_choice: bool
This skips the logit choice step and simply returns the alternatives table with logsums
(used in disaggregate accessibility)
explicit_chunk_size : float, optional
If > 0, specifies the chunk size to use when chunking the interaction
simulation. If < 1, specifies the fraction of the total number of choosers.
Returns
-------
Expand Down Expand Up @@ -484,7 +488,13 @@ def interaction_sample_simulate(
chunk_trace_label,
chunk_sizer,
) in chunk.adaptive_chunked_choosers_and_alts(
state, choosers, alternatives, trace_label, chunk_tag, chunk_size=chunk_size
state,
choosers,
alternatives,
trace_label,
chunk_tag,
chunk_size=chunk_size,
explicit_chunk_size=explicit_chunk_size,
):
choices = _interaction_sample_simulate(
state,
Expand Down
4 changes: 2 additions & 2 deletions activitysim/core/interaction_simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,9 +967,9 @@ def interaction_simulate(
when household tracing enabled. No tracing occurs if label is empty or None.
trace_choice_name: str
This is the column label to be used in trace file csv dump of choices
explicit_chunk_size : int, optional
explicit_chunk_size : float, optional
If > 0, specifies the chunk size to use when chunking the interaction
simulation.
simulation. If < 1, specifies the fraction of the total number of choosers.
Returns
-------
Expand Down
Loading

0 comments on commit 29d12bc

Please sign in to comment.