Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend trip analysis #3540

Merged
merged 4 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@
import it.unimi.dsi.fastutil.ints.IntList;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.objects.Object2IntLinkedOpenHashMap;
import it.unimi.dsi.fastutil.objects.Object2IntMap;
import it.unimi.dsi.fastutil.objects.Object2LongMap;
import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap;
import it.unimi.dsi.fastutil.objects.*;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.math3.analysis.interpolation.LoessInterpolator;
Expand All @@ -27,6 +24,7 @@
import org.matsim.core.utils.io.IOUtils;
import picocli.CommandLine;
import tech.tablesaw.api.*;
import tech.tablesaw.columns.strings.AbstractStringColumn;
import tech.tablesaw.io.csv.CsvReadOptions;
import tech.tablesaw.joining.DataFrameJoiner;
import tech.tablesaw.selection.Selection;
Expand All @@ -37,6 +35,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import static tech.tablesaw.aggregate.AggregateFunctions.count;
Expand All @@ -46,8 +45,9 @@
requires = {"trips.csv", "persons.csv"},
produces = {
"mode_share.csv", "mode_share_per_dist.csv", "mode_users.csv", "trip_stats.csv",
"mode_share_per_%s.csv", "population_trip_stats.csv", "trip_purposes_by_hour.csv",
"mode_share_distance_distribution.csv", "mode_shift.csv",
"mode_share_per_purpose.csv", "mode_share_per_%s.csv",
"population_trip_stats.csv", "trip_purposes_by_hour.csv",
"mode_share_distance_distribution.csv", "mode_shift.csv", "mode_chains.csv",
"mode_choices.csv", "mode_choice_evaluation.csv", "mode_choice_evaluation_per_mode.csv",
"mode_confusion_matrix.csv", "mode_prediction_error.csv"
}
Expand Down Expand Up @@ -283,10 +283,15 @@ public Integer call() throws Exception {

joined.addColumns(dist_group);

TextColumn purpose = joined.textColumn("end_activity_type");

// Remove suffix durations like _345
purpose.set(Selection.withRange(0, purpose.size()), purpose.replaceAll("_[0-9]{2,}$", ""));

writeModeShare(joined, labels);

if (groups != null) {
groups.analyzeModeShare(joined, labels, modeOrder, (g) -> output.getPath("mode_share_per_%s.csv", g));
groups.writeModeShare(joined, labels, modeOrder, (g) -> output.getPath("mode_share_per_%s.csv", g));
}

if (persons.containsColumn(ATTR_REF_MODES)) {
Expand All @@ -305,17 +310,24 @@ public Integer call() throws Exception {

writePopulationStats(persons, joined);

writeTripStats(joined);

writeTripPurposes(joined);

writeTripDistribution(joined);

writeModeShift(joined);
tryRun(this::writeTripStats, joined);
tryRun(this::writeTripPurposes, joined);
tryRun(this::writeTripDistribution, joined);
tryRun(this::writeModeShift, joined);
tryRun(this::writeModeChains, joined);
tryRun(this::writeModeStatsPerPurpose, joined);

return 0;
}

private void tryRun(ThrowingConsumer<Table> f, Table df) {
try {
f.accept(df);
} catch (IOException e) {
log.error("Error while running method", e);
}
}

private void writeModeShare(Table trips, List<String> labels) {

Table aggr = trips.summarize("trip_id", count).by("dist_group", "main_mode");
Expand Down Expand Up @@ -502,11 +514,6 @@ private void writeTripPurposes(Table trips) {
IntColumn.create("arrival_h", arrival.intStream().toArray())
);

TextColumn purpose = trips.textColumn("end_activity_type");

// Remove suffix durations like _345
purpose.set(Selection.withRange(0, purpose.size()), purpose.replaceAll("_[0-9]{2,}$", ""));

Table tArrival = trips.summarize("trip_id", count).by("end_activity_type", "arrival_h");

tArrival.column(0).setName("purpose");
Expand Down Expand Up @@ -610,6 +617,89 @@ private void writeModeShift(Table trips) throws IOException {
aggr.write().csv(output.getPath("mode_shift.csv").toFile());
}

/**
* Collects information about all modes used during one day.
*/
private void writeModeChains(Table trips) throws IOException {

Map<String, List<String>> modesPerPerson = new LinkedHashMap<>();

for (Row trip : trips) {
String id = trip.getString("person");
String mode = trip.getString("main_mode");
modesPerPerson.computeIfAbsent(id, s -> new LinkedList<>()).add(mode);
}

// Store other values explicitly
ObjectDoubleMutablePair<String> other = ObjectDoubleMutablePair.of("other", 0);
Object2DoubleMap<String> chains = new Object2DoubleOpenHashMap<>();
for (List<String> modes : modesPerPerson.values()) {
String key;
if (modes.size() == 1)
key = modes.getFirst();
else if (modes.size() > 6) {
other.right(other.rightDouble() + 1);
continue;
} else
key = String.join("-", modes);

chains.mergeDouble(key, 1, Double::sum);
}


List<ObjectDoubleMutablePair<String>> counts = chains.object2DoubleEntrySet().stream()
.map(e -> ObjectDoubleMutablePair.of(e.getKey(), (int) e.getDoubleValue()))
.sorted(Comparator.comparingDouble(p -> -p.rightDouble()))
.collect(Collectors.toList());

// Aggregate entries to prevent file from getting too large
for (int i = 250; i < counts.size(); i++) {
other.right(other.rightDouble() + counts.get(i).rightDouble());
}
counts = counts.subList(0, Math.min(counts.size(), 250));
counts.add(other);

counts.sort(Comparator.comparingDouble(p -> -p.rightDouble()));


try (CSVPrinter printer = new CSVPrinter(Files.newBufferedWriter(output.getPath("mode_chains.csv")), CSVFormat.DEFAULT)) {

printer.printRecord("modes", "count", "share");

double total = counts.stream().mapToDouble(ObjectDoubleMutablePair::rightDouble).sum();
for (ObjectDoubleMutablePair<String> p : counts) {
printer.printRecord(p.left(), (int) p.rightDouble(), p.rightDouble() / total);
}
}
}

@SuppressWarnings("unchecked")
private void writeModeStatsPerPurpose(Table trips) {

Table aggr = trips.summarize("trip_id", count).by("end_activity_type", "main_mode");

Comparator<Row> cmp = Comparator.comparing(row -> row.getString("end_activity_type"));
aggr = aggr.sortOn(cmp.thenComparing(row -> row.getString("main_mode")));

aggr.doubleColumn(aggr.columnCount() - 1).setName("share");
aggr.column("end_activity_type").setName("purpose");

Set<String> purposes = (Set<String>) aggr.column("purpose").asSet();

// Norm each purpose to 1
// It was not clear if the purpose is a string or text colum, therefor this code uses the abstract version
for (String label : purposes) {
DoubleColumn all = aggr.doubleColumn("share");
Selection sel = ((AbstractStringColumn<?>) aggr.column("purpose")).isEqualTo(label);

double total = all.where(sel).sum();
if (total > 0)
all.set(sel, all.divide(total));
}

aggr.write().csv(output.getPath("mode_share_per_purpose.csv").toFile());
}

/**
* How shape file filtering should be applied.
*/
Expand All @@ -619,4 +709,9 @@ enum LocationFilter {
home,
none
}

@FunctionalInterface
private interface ThrowingConsumer<T> {
void accept(T t) throws IOException;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ final class TripByGroupAnalysis {
}
}

void analyzeModeShare(Table trips, List<String> dists, List<String> modeOrder, Function<String, Path> output) {
void writeModeShare(Table trips, List<String> dists, List<String> modeOrder, Function<String, Path> output) {

for (Group group : groups) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ void trip() {
Assertions.assertThat(out)
.isDirectoryContaining("glob:**trip_stats.csv")
.isDirectoryContaining("glob:**mode_share.csv")
.isDirectoryContaining("glob:**mode_share_per_purpose.csv")
.isDirectoryContaining("glob:**mode_shift.csv");
}

Expand Down
Loading