Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Repair Shared Stops Validator #584

Merged
merged 8 commits into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/maven.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ jobs:
java-version: 19
distribution: 'temurin'
# Install node 14 for running e2e tests (and for maven-semantic-release).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit update to 20 or remove 14.

- name: Use Node.js 18.x
- name: Use Node.js 20.x
uses: actions/setup-node@v1
with:
node-version: 18.x
node-version: 20.x
- name: Start MongoDB
uses: supercharge/[email protected]
with:
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@
<groupId>com.github.ibi-group</groupId>
<artifactId>gtfs-lib</artifactId>
<!-- Latest dev build on jitpack.io -->
<version>3fa0da9</version>
<version>761456fdc66bc1b72fc860b5e5853b0f2904e009</version>
<!-- Exclusions added in order to silence SLF4J warnings about multiple bindings:
http://www.slf4j.org/codes.html#multiple_bindings
-->
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package com.conveyal.datatools.manager.jobs.validation;

import java.util.Map;

public enum SharedStopsHeader {

STOP_GROUP_ID_INDEX("stop_group_id"),

STOP_ID_INDEX("stop_id"),

IS_PRIMARY_INDEX("is_primary"),

FEED_ID_INDEX("feed_id");

public final String headerName;

SharedStopsHeader(String name) {
this.headerName = name;
}

public String getHeaderName() {
return headerName;
}

public static SharedStopsHeader fromValue(String headerName) {
for (SharedStopsHeader sharedStopsHeader : SharedStopsHeader.values()) {
if (sharedStopsHeader.getHeaderName().equalsIgnoreCase(headerName)) {
return sharedStopsHeader;
}
}
throw new UnsupportedOperationException(String.format("Unknown shared stops header: %s", headerName));
}

public static boolean hasRequiredHeaders(Map<SharedStopsHeader, Integer> headerIndices) {
return headerIndices.size() == SharedStopsHeader.values().length;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.EnumMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Map;

import static com.conveyal.datatools.manager.jobs.validation.SharedStopsHeader.STOP_GROUP_ID_INDEX;
import static com.conveyal.datatools.manager.jobs.validation.SharedStopsHeader.STOP_ID_INDEX;

public class SharedStopsValidator extends FeedValidator {
private static final Logger LOG = LoggerFactory.getLogger(SharedStopsValidator.class);
Expand All @@ -42,13 +44,53 @@ public SharedStopsValidator buildSharedStopsValidator(Feed feed, SQLErrorStorage
}
return new SharedStopsValidator(feed, errorStorage, this.project, this.feedId);
}

public SharedStopsValidator(Feed feed, SQLErrorStorage errorStorage, Project project, String feedId) {
super(feed, errorStorage);
this.feed = feed;
this.project = project;
this.feedId = feedId;
}

public static String[] getHeaders(CsvReader configReader) throws IOException {
configReader.setHeaders(configReader.getRawRecord().split(","));
return configReader.getHeaders();
}

public static Map<SharedStopsHeader, Integer> getHeaderIndices(CsvReader configReader) {
Map<SharedStopsHeader, Integer> headerIndices = new EnumMap<>(SharedStopsHeader.class);

try {
configReader.readRecord();
String[] headers = getHeaders(configReader);

for (int i = 0; i < headers.length; i++) {
String header = headers[i];
switch (SharedStopsHeader.fromValue(header)) {
case STOP_GROUP_ID_INDEX:
headerIndices.put(STOP_GROUP_ID_INDEX, i);
break;
case FEED_ID_INDEX:
headerIndices.put(SharedStopsHeader.FEED_ID_INDEX, i);
break;
case STOP_ID_INDEX:
headerIndices.put(STOP_ID_INDEX, i);
break;
case IS_PRIMARY_INDEX:
headerIndices.put(SharedStopsHeader.IS_PRIMARY_INDEX, i);
break;
}
}

if (!SharedStopsHeader.hasRequiredHeaders(headerIndices)) {
throw new RuntimeException("shared_stops.csv is missing headers!");
}
} catch (IOException e) {
throw new RuntimeException("shared_stops.csv was invalid!", e);
}
return headerIndices;
}

@Override
public void validate() {
String config = project.sharedStopsConfig;
Expand All @@ -58,12 +100,6 @@ public void validate() {

CsvReader configReader = CsvReader.parse(config);

// TODO: pull indicies from the CSV header
final int STOP_GROUP_ID_INDEX = 0;
final int STOP_ID_INDEX = 2;
final int IS_PRIMARY_INDEX = 3;
final int FEED_ID_INDEX = 1;

// Build list of stop Ids.
List<String> stopIds = new ArrayList<>();
List<Stop> stops = new ArrayList<>();
Expand All @@ -73,54 +109,59 @@ public void validate() {
stopIds.add(stop.stop_id);
}

// Initialize hashmaps to hold duplication info
HashMap<String, Set<String>> stopGroupStops = new HashMap<>();
// Initialize hashmaps to hold duplication info.
HashSet<String> seenStopIds = new HashSet<>();
HashSet<String> stopGroupsWithPrimaryStops = new HashSet<>();

try {
Map<SharedStopsHeader, Integer> headerIndices = getHeaderIndices(configReader);
while (configReader.readRecord()) {
String stopGroupId = configReader.get(STOP_GROUP_ID_INDEX);
String stopId = configReader.get(STOP_ID_INDEX);
String sharedStopFeedId = configReader.get(FEED_ID_INDEX);
String stopGroupId = configReader.get(headerIndices.get(STOP_GROUP_ID_INDEX));
String stopId = configReader.get(headerIndices.get(STOP_ID_INDEX));
String sharedStopFeedId = configReader.get(headerIndices.get(SharedStopsHeader.FEED_ID_INDEX));

if (stopId.equals("stop_id")) {
// Swallow header row
if (stopId.equals(STOP_ID_INDEX.headerName)) {
// Swallow header row.
continue;
}

stopGroupStops.putIfAbsent(stopGroupId, new HashSet<>());
Set<String> seenStopIds = stopGroupStops.get(stopGroupId);

// Check for SS_01 (stop id appearing in multiple stop groups)
// Check for SS_01 (stop id appearing in multiple stop groups).
// Make sure this error is only returned if we are inside the feed that is being checked.
if (seenStopIds.contains(stopId)) {
registerError(stops
if (feedId.equals(sharedStopFeedId)) {
registerError(stops
.stream()
.filter(stop -> stop.stop_id.equals(stopId))
.findFirst()
.orElse(stops.get(0)), NewGTFSErrorType.MULTIPLE_SHARED_STOPS_GROUPS
);
.orElse(new Stop()), NewGTFSErrorType.MULTIPLE_SHARED_STOPS_GROUPS
);
}
} else {
seenStopIds.add(stopId);
}

// Check for SS_02 (multiple primary stops per stop group)
if (stopGroupsWithPrimaryStops.contains(stopGroupId)) {
registerError(NewGTFSError.forFeed(NewGTFSErrorType.SHARED_STOP_GROUP_MUTLIPLE_PRIMARY_STOPS, stopGroupId));
} else if (configReader.get(IS_PRIMARY_INDEX).equals("true")) {
stopGroupsWithPrimaryStops.add(stopGroupId);
// Check for SS_02 (multiple primary stops per stop group).
if (
configReader.get(headerIndices.get(SharedStopsHeader.IS_PRIMARY_INDEX)).equals("1") ||
configReader.get(headerIndices.get(SharedStopsHeader.IS_PRIMARY_INDEX)).equals("true")
) {
if (stopGroupsWithPrimaryStops.contains(stopGroupId)) {
registerError(NewGTFSError.forFeed(NewGTFSErrorType.SHARED_STOP_GROUP_MULTIPLE_PRIMARY_STOPS, stopGroupId));
} else {
stopGroupsWithPrimaryStops.add(stopGroupId);
}
}

// Check for SS_03 (stop_id referenced doesn't exist)
// Make sure this error is only returned if we are inside the feed that is being checked
// Check for SS_03 (stop_id referenced doesn't exist).
// Make sure this error is only returned if we are inside the feed that is being checked.
if (feedId.equals(sharedStopFeedId) && !stopIds.contains(stopId)) {
registerError(NewGTFSError.forFeed(NewGTFSErrorType.SHARED_STOP_GROUP_ENTITY_DOES_NOT_EXIST, stopId));
}
}
} catch (IOException e) { LOG.error(e.toString()); }
finally {
} catch (IOException e) {
LOG.error("Unable to validate shared stops.", e);
} finally {
configReader.close();
}


}
}
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ public boolean hasBlockingIssuesForPublishing() {
NewGTFSErrorType.TABLE_IN_SUBDIRECTORY,
NewGTFSErrorType.WRONG_NUMBER_OF_FIELDS,
NewGTFSErrorType.MULTIPLE_SHARED_STOPS_GROUPS,
NewGTFSErrorType.SHARED_STOP_GROUP_MUTLIPLE_PRIMARY_STOPS
NewGTFSErrorType.SHARED_STOP_GROUP_MULTIPLE_PRIMARY_STOPS
));
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package com.conveyal.datatools.manager.jobs.validation;

import com.conveyal.datatools.UnitTest;
import com.csvreader.CsvReader;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;

import java.util.stream.Stream;

import static com.zenika.snapshotmatcher.SnapshotMatcher.matchesSnapshot;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;

class SharedStopsValidatorTest extends UnitTest {

@ParameterizedTest
@MethodSource("createCSVHeaders")
void canHandleVariousCorrectCSV(String headers) {
assertThat(SharedStopsValidator.getHeaderIndices(CsvReader.parse(headers)), matchesSnapshot());
}

private static Stream<String> createCSVHeaders() {
return Stream.of(
"is_primary,feed_id,stop_id,stop_group_id",
"feed_id,stop_id,stop_group_id,is_primary",
"feed_id,is_primary,stop_group_id,stop_id,feed_id"
);
}

@ParameterizedTest
@MethodSource("createInvalidCSVArguments")
void attemptToParseInvalidCSV(InvalidCSVArgument invalidCSVArgument) {
CsvReader configReader = CsvReader.parse(invalidCSVArgument.csv);
Exception exception = assertThrows(RuntimeException.class, () -> SharedStopsValidator.getHeaderIndices(configReader));
assertEquals(invalidCSVArgument.expectedError, exception.getMessage());
}

private static Stream<InvalidCSVArgument> createInvalidCSVArguments() {
String invalid = "Unknown shared stops header: ";
String missing = "shared_stops.csv is missing headers!";
return Stream.of(
new InvalidCSVArgument("this is a great string, but it's not a shared_stops csv!", invalid + "this is a great string"),
new InvalidCSVArgument("", invalid),
new InvalidCSVArgument("is_primary,stop_id", missing),
new InvalidCSVArgument("is_primary,feed_id,,stop_group_id", invalid),
new InvalidCSVArgument("is_primary,feed_id,stop_group_id", missing),
new InvalidCSVArgument("feed_id, is_primary, stop_group_id,stop_id", invalid + " is_primary")
);
}

private static class InvalidCSVArgument {
public String csv;
public String expectedError;

public InvalidCSVArgument(String csv, String expectedError) {
this.csv = csv;
this.expectedError = expectedError;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"STOP_GROUP_ID_INDEX" : 3,
"STOP_ID_INDEX" : 2,
"IS_PRIMARY_INDEX" : 0,
"FEED_ID_INDEX" : 1
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"STOP_GROUP_ID_INDEX" : 2,
"STOP_ID_INDEX" : 1,
"IS_PRIMARY_INDEX" : 3,
"FEED_ID_INDEX" : 0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"STOP_GROUP_ID_INDEX" : 2,
"STOP_ID_INDEX" : 3,
"IS_PRIMARY_INDEX" : 1,
"FEED_ID_INDEX" : 4
}
Loading