Skip to content

Commit

Permalink
adjusted distances
Browse files Browse the repository at this point in the history
  • Loading branch information
Hussein-Mahfouz committed Dec 13, 2024
1 parent 1cec3f7 commit 84a9273
Show file tree
Hide file tree
Showing 10 changed files with 133 additions and 14 deletions.
7 changes: 7 additions & 0 deletions config/base.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ optional_columns = [
]
n_matches = 10 # What is the maximum number of NTS matches we want for each SPC household?

[feasible_assignment]
# detour factor when converting euclidian distance to actual travel distance
detour_factor = 1.56
# decay rate when converting euclidian to travel distance (0.0001 is a good value)
# actual_distance = distance * (1 + ((detour_factor - 1) * np.exp(-decay_rate * distance)))
decay_rate = 0.0001

[work_assignment]
commute_level = "OA"
use_percentages = true # if true, optimization problem will try to minimize percentage difference at OD level (not absolute numbers). Recommended to set it to true
Expand Down
10 changes: 9 additions & 1 deletion scripts/3.1_assign_primary_feasible_zones.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ def main(config_file):
logger.info("Creating estimated travel times matrix")
# Create a new travel time matrix based on distances between zones
travel_time_estimates = zones_to_time_matrix(
zones=boundaries, id_col=config.zone_id, time_units="m"
zones=boundaries,
id_col=config.zone_id,
time_units="m",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)
logger.info("Travel time estimates created")

Expand Down Expand Up @@ -223,6 +227,8 @@ def main(config_file):
filter_by_activity=True,
activity_col="education_type",
time_tolerance=0.3,
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)

logger.info("Saving feasible zones for education activities")
Expand Down Expand Up @@ -250,6 +256,8 @@ def main(config_file):
filter_by_activity=True,
activity_col="dact",
time_tolerance=0.3,
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)

logger.info("Saving feasible zones for work activities")
Expand Down
12 changes: 9 additions & 3 deletions scripts/3.3_assign_facility_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,10 @@ def main(config_file):
x_col="TripDisIncSW",
y_col="length",
x_label="Reported Travel Distance (km)",
y_label="Actual Distance - Euclidian (km)",
y_label="Actual Distance - Estimated (km)",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
x_axis_max=50,
crs=f"EPSG:{config.output_crs}",
title_prefix=f"Scatter plot of TripDisIncSW vs. Length for {activity_type}",
save_dir=acbm.root_path / "data/processed/plots/assigning/",
Expand All @@ -344,8 +347,11 @@ def main(config_file):
activity_type_col="destination activity",
x_col="TripTotalTime",
y_col="length",
x_label="Reported Travel TIme (min)",
y_label="Actual Distance - Euclidian (km)",
x_label="Reported Travel Time (min)",
y_label="Actual Distance - Estimated (km)",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
x_axis_max=180,
crs=f"EPSG:{config.output_crs}",
title_prefix="Scatter plot of TripTotalTime vs. Length",
save_dir=acbm.root_path / "data/processed/plots/assigning/",
Expand Down
2 changes: 2 additions & 0 deletions scripts/4_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ def main(config_file):
start_wkt_col="start_location_geometry_wkt",
end_wkt_col="end_location_geometry_wkt",
crs_epsg=config.output_crs,
detour_factor=1.56,
decay_rate=0.0001,
)

# Plot: Aggregate
Expand Down
10 changes: 5 additions & 5 deletions scripts/run_pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ set -e
# python scripts/0_preprocess_inputs.py --config_file $1
# python scripts/0.1_run_osmox.py --config_file $1
# python scripts/1_prep_synthpop.py --config_file $1
python scripts/2_match_households_and_individuals.py --config_file $1
python scripts/3.1_assign_primary_feasible_zones.py --config_file $1
python scripts/3.2.1_assign_primary_zone_edu.py --config_file $1
python scripts/3.2.2_assign_primary_zone_work.py --config_file $1
python scripts/3.2.3_assign_secondary_zone.py --config_file $1
# python scripts/2_match_households_and_individuals.py --config_file $1
# python scripts/3.1_assign_primary_feasible_zones.py --config_file $1
# python scripts/3.2.1_assign_primary_zone_edu.py --config_file $1
# python scripts/3.2.2_assign_primary_zone_work.py --config_file $1
# python scripts/3.2.3_assign_secondary_zone.py --config_file $1
python scripts/3.3_assign_facility_all.py --config_file $1
python scripts/4_validation.py --config_file $1
python scripts/5_acbm_to_matsim_xml.py --config_file $1
17 changes: 15 additions & 2 deletions src/acbm/assigning/feasible_zones_primary.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def get_possible_zones(
travel_times: Optional[pd.DataFrame] = None,
filter_by_activity: bool = False,
time_tolerance: float = 0.2,
detour_factor: float = 1.56,
decay_rate: float = 0.0001,
) -> dict:
"""
Get possible zones for all activity chains in the dataset. This function loops over the travel_times dataframe and filters by mode, time of day and weekday/weekend.
Expand Down Expand Up @@ -104,6 +106,13 @@ def get_possible_zones(
activity chain's travel time (which is stored in "TripTotalTime"). Allowable travel_times are those that fall in the range of:
travel_time_reported * (1 - time_tolerance) <= travel_time_reported <= travel_time_reported * (1 + time_tolerance)
Default = 0.2
detour_factor: float
The detour factor used to calculate travel distances between zones from euclidian distances.
Default = 1.56
decay_rate: float
The decay rate used to calculate travel times between zones from travel distances. Detours make up a smaller portion of
longer distance trips. Default = 0.0001
Returns
-------
Expand Down Expand Up @@ -135,7 +144,11 @@ def get_possible_zones(
if travel_times is None:
logger.info("Travel time matrix not provided: Creating travel times estimates")
travel_times = zones_to_time_matrix(
zones=boundaries, id_col=zone_id, time_units="m"
zones=boundaries,
id_col=zone_id,
time_units="m",
detour_factor=detour_factor,
decay_rate=decay_rate,
)

list_of_modes = activity_chains["mode"].unique()
Expand Down Expand Up @@ -224,7 +237,7 @@ def get_possible_zones(
travel_times_filtered_mode_time_day = travel_times_filtered_mode
# if mode = car_passenger, we compare to the travel time for car (we don't
# have travel times for car_passenger)
if mode == "car_passenger":
if mode in ("car_passenger", "taxi"):
activity_chains_filtered = activity_chains[
(activity_chains["mode"] == "car")
]
Expand Down
19 changes: 19 additions & 0 deletions src/acbm/assigning/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from matplotlib import patches as mpatches
from shapely.geometry import LineString

from acbm.assigning.utils import _adjust_distance


def plot_workzone_assignment_line(
assignment_results: pd.DataFrame,
Expand Down Expand Up @@ -387,7 +389,10 @@ def plot_scatter_actual_reported(
title_prefix: str,
activity_type: str,
activity_type_col: str,
detour_factor: float,
decay_rate: float,
crs: str,
x_axis_max: float,
save_dir: str | Path | None = None,
display: bool = False,
):
Expand All @@ -404,7 +409,10 @@ def plot_scatter_actual_reported(
- title_prefix: Prefix for the plot titles.
- activity_type: Type of activity to plot.
- activity_type_col: Column name for the activity type.
- detour_factor: Detour factor for the distance adjustment. (see `_adjust_distance`)
- decay_rate: Decay rate for the distance adjustment.
- crs: Coordinate reference system (CRS) of the activities data.
- x_axis_max: Maximum value for the x-axis. (use to remove very high reported distances / times)
- save_dir: Directory to save the plots. If None, plots are not saved.
- display: Whether to display plots by calling `plt.show()`.
"""
Expand Down Expand Up @@ -433,6 +441,13 @@ def plot_scatter_actual_reported(
# calculate the length of the line_geometry in meters
activity_chains_plot["length"] = activity_chains_plot["line_geometry"].length

# Create a column for estimated distance
activity_chains_plot["length"] = activity_chains_plot["length"].apply(
lambda d: _adjust_distance(
d, detour_factor=detour_factor, decay_rate=decay_rate
)
)

# Calculate the number of rows and columns for the subplots. It is a function of the number of modes
nrows = math.ceil(len(activity_chains_plot["mode"].unique()) / 2)
ncols = 2
Expand All @@ -459,6 +474,10 @@ def plot_scatter_actual_reported(
p = np.poly1d(z)
ax.plot(subset_mode[x_col], p(subset_mode[x_col]), "r--")

# Set x-axis limit
if x_axis_max:
ax.set_xlim(0, x_axis_max)

ax.set_title(f"{title_prefix} for mode: {mode}")
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
Expand Down
36 changes: 36 additions & 0 deletions src/acbm/assigning/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,10 +284,39 @@ def _get_activities_per_zone_df(activities_per_zone: dict) -> pd.DataFrame:
return pd.concat(activities_per_zone.values())


def _adjust_distance(
distance: float,
detour_factor: float,
decay_rate: float,
) -> float:
"""
Adjusts euclidian distances by adding a detour factor. We use minkowski distance
and a decay rate, as longer detour makes up a smaller proportion of the total
distance as the distance increases.
Parameters
----------
distance : float
The original distance.
detour_factor : float
The detour factor to be applied.
decay_rate : float
The decay rate to be applied.
Returns
-------
float
The adjusted distance.
"""
return distance * (1 + ((detour_factor - 1) * np.exp(-decay_rate * distance)))


def zones_to_time_matrix(
zones: gpd.GeoDataFrame,
time_units: str,
id_col: Optional[str] = None,
detour_factor: float = 1.56,
decay_rate: float = 0.0001,
) -> pd.DataFrame:
"""
Calculates the distance matrix between the centroids of the given zones and returns it as a DataFrame. The matrix also adds
Expand All @@ -305,6 +334,8 @@ def zones_to_time_matrix(
The name of the column in the zones GeoDataFrame to use as the ID. If None, the index values are used. Default is None.
time_units: str, optional
The units to use for the travel time. Options are 's' for seconds and 'm' for minutes.
detour_factor: float, optional
The detour factor to apply to the distance. Default is 1.56.
Returns
-------
pd.DataFrame
Expand All @@ -321,6 +352,11 @@ def zones_to_time_matrix(
distances = distances.stack().reset_index()
distances.columns = [f"{id_col}_from", f"{id_col}_to", "distance"]

# adjust distance column by adding a detour factor
distances["distance"] = distances["distance"].apply(
lambda d: _adjust_distance(d, detour_factor, decay_rate)
)

# replace the index values with the id_col values if id_col is specified
if id_col is not None:
# create a mapping from index to id_col values
Expand Down
11 changes: 10 additions & 1 deletion src/acbm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ class MatchingParams(BaseModel):
chunk_size: int = 50_000


@dataclass(frozen=True)
class FeasibleAssignmentParams(BaseModel):
detour_factor: float
decay_rate: float


@dataclass(frozen=True)
class WorkAssignmentParams(BaseModel):
use_percentages: bool
Expand All @@ -51,10 +57,13 @@ class Postprocessing(BaseModel):

class Config(BaseModel):
parameters: Parameters = Field(description="Config: parameters.")
matching: MatchingParams = Field(description="Config: parameters for matching.")
feasible_assignment: FeasibleAssignmentParams = Field(
description="Config: parameters for assignment of feasible zones."
)
work_assignment: WorkAssignmentParams = Field(
description="Config: parameters for work assignment."
)
matching: MatchingParams = Field(description="Config: parameters for matching.")
postprocessing: Postprocessing = Field(
description="Config: parameters for postprocessing."
)
Expand Down
23 changes: 21 additions & 2 deletions src/acbm/validating/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import pandas as pd
from shapely import wkt

from acbm.assigning.utils import _adjust_distance


def process_sequences(
df: pd.DataFrame,
Expand Down Expand Up @@ -76,6 +78,8 @@ def calculate_od_distances(
end_wkt_col: str,
crs_epsg: int,
projected_epsg: int = 3857,
detour_factor: float = 1.56,
decay_rate: float = 0.0001,
) -> pd.DataFrame:
"""
Calculate distances between start and end geometries in a DataFrame.
Expand All @@ -94,6 +98,11 @@ def calculate_od_distances(
projected_epsg: int
EPSG code for the projected CRS (default is 3857). We need a metric crs
to calculte distances in meters.
detour_factor: float
Factor to adjust the estimated distance.
decay_rate: float
Decay rate for the distance adjustment. Detours are a smaller proportion of
the direct distance for longer trips.
Returns
-------
Expand All @@ -120,7 +129,17 @@ def calculate_od_distances(
gdf = gdf.to_crs(epsg=projected_epsg)
end_gdf = end_gdf.to_crs(epsg=projected_epsg)

# Calculate the distance between start and end geometries (in km)
gdf["distance"] = round(gdf.geometry.distance(end_gdf.geometry) / 1000, 1)
# Calculate the distance between start and end geometries (in m)
gdf["distance"] = gdf.geometry.distance(end_gdf.geometry)

# Estimate actual travel distance
gdf["distance"] = gdf["distance"].apply(
lambda d: _adjust_distance(
d, detour_factor=detour_factor, decay_rate=decay_rate
)
)

# convert distance to km
gdf["distance"] = round(gdf["distance"] / 1000, 1)

return gdf

0 comments on commit 84a9273

Please sign in to comment.