Skip to content

Commit

Permalink
Merge pull request #584 from ibi-group/shared-stops-validator-repair
Browse files Browse the repository at this point in the history
Repair Shared Stops Validator
  • Loading branch information
miles-grant-ibigroup authored Jan 16, 2024
2 parents d7ec621 + 509cec4 commit e14d5a2
Show file tree
Hide file tree
Showing 9 changed files with 194 additions and 37 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/maven.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ jobs:
java-version: 19
distribution: 'temurin'
# Install node 14 for running e2e tests (and for maven-semantic-release).
- name: Use Node.js 18.x
- name: Use Node.js 20.x
uses: actions/setup-node@v1
with:
node-version: 18.x
node-version: 20.x
- name: Start MongoDB
uses: supercharge/[email protected]
with:
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@
<groupId>com.github.ibi-group</groupId>
<artifactId>gtfs-lib</artifactId>
<!-- Latest dev build on jitpack.io -->
<version>3fa0da9</version>
<version>761456fdc66bc1b72fc860b5e5853b0f2904e009</version>
<!-- Exclusions added in order to silence SLF4J warnings about multiple bindings:
http://www.slf4j.org/codes.html#multiple_bindings
-->
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package com.conveyal.datatools.manager.jobs.validation;

import java.util.Map;

public enum SharedStopsHeader {

STOP_GROUP_ID_INDEX("stop_group_id"),

STOP_ID_INDEX("stop_id"),

IS_PRIMARY_INDEX("is_primary"),

FEED_ID_INDEX("feed_id");

public final String headerName;

SharedStopsHeader(String name) {
this.headerName = name;
}

public String getHeaderName() {
return headerName;
}

public static SharedStopsHeader fromValue(String headerName) {
for (SharedStopsHeader sharedStopsHeader : SharedStopsHeader.values()) {
if (sharedStopsHeader.getHeaderName().equalsIgnoreCase(headerName)) {
return sharedStopsHeader;
}
}
throw new UnsupportedOperationException(String.format("Unknown shared stops header: %s", headerName));
}

public static boolean hasRequiredHeaders(Map<SharedStopsHeader, Integer> headerIndices) {
return headerIndices.size() == SharedStopsHeader.values().length;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.EnumMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Map;

import static com.conveyal.datatools.manager.jobs.validation.SharedStopsHeader.STOP_GROUP_ID_INDEX;
import static com.conveyal.datatools.manager.jobs.validation.SharedStopsHeader.STOP_ID_INDEX;

public class SharedStopsValidator extends FeedValidator {
private static final Logger LOG = LoggerFactory.getLogger(SharedStopsValidator.class);
Expand All @@ -42,13 +44,53 @@ public SharedStopsValidator buildSharedStopsValidator(Feed feed, SQLErrorStorage
}
return new SharedStopsValidator(feed, errorStorage, this.project, this.feedId);
}

public SharedStopsValidator(Feed feed, SQLErrorStorage errorStorage, Project project, String feedId) {
super(feed, errorStorage);
this.feed = feed;
this.project = project;
this.feedId = feedId;
}

public static String[] getHeaders(CsvReader configReader) throws IOException {
configReader.setHeaders(configReader.getRawRecord().split(","));
return configReader.getHeaders();
}

public static Map<SharedStopsHeader, Integer> getHeaderIndices(CsvReader configReader) {
Map<SharedStopsHeader, Integer> headerIndices = new EnumMap<>(SharedStopsHeader.class);

try {
configReader.readRecord();
String[] headers = getHeaders(configReader);

for (int i = 0; i < headers.length; i++) {
String header = headers[i];
switch (SharedStopsHeader.fromValue(header)) {
case STOP_GROUP_ID_INDEX:
headerIndices.put(STOP_GROUP_ID_INDEX, i);
break;
case FEED_ID_INDEX:
headerIndices.put(SharedStopsHeader.FEED_ID_INDEX, i);
break;
case STOP_ID_INDEX:
headerIndices.put(STOP_ID_INDEX, i);
break;
case IS_PRIMARY_INDEX:
headerIndices.put(SharedStopsHeader.IS_PRIMARY_INDEX, i);
break;
}
}

if (!SharedStopsHeader.hasRequiredHeaders(headerIndices)) {
throw new RuntimeException("shared_stops.csv is missing headers!");
}
} catch (IOException e) {
throw new RuntimeException("shared_stops.csv was invalid!", e);
}
return headerIndices;
}

@Override
public void validate() {
String config = project.sharedStopsConfig;
Expand All @@ -58,12 +100,6 @@ public void validate() {

CsvReader configReader = CsvReader.parse(config);

// TODO: pull indicies from the CSV header
final int STOP_GROUP_ID_INDEX = 0;
final int STOP_ID_INDEX = 2;
final int IS_PRIMARY_INDEX = 3;
final int FEED_ID_INDEX = 1;

// Build list of stop Ids.
List<String> stopIds = new ArrayList<>();
List<Stop> stops = new ArrayList<>();
Expand All @@ -73,54 +109,59 @@ public void validate() {
stopIds.add(stop.stop_id);
}

// Initialize hashmaps to hold duplication info
HashMap<String, Set<String>> stopGroupStops = new HashMap<>();
// Initialize hashmaps to hold duplication info.
HashSet<String> seenStopIds = new HashSet<>();
HashSet<String> stopGroupsWithPrimaryStops = new HashSet<>();

try {
Map<SharedStopsHeader, Integer> headerIndices = getHeaderIndices(configReader);
while (configReader.readRecord()) {
String stopGroupId = configReader.get(STOP_GROUP_ID_INDEX);
String stopId = configReader.get(STOP_ID_INDEX);
String sharedStopFeedId = configReader.get(FEED_ID_INDEX);
String stopGroupId = configReader.get(headerIndices.get(STOP_GROUP_ID_INDEX));
String stopId = configReader.get(headerIndices.get(STOP_ID_INDEX));
String sharedStopFeedId = configReader.get(headerIndices.get(SharedStopsHeader.FEED_ID_INDEX));

if (stopId.equals("stop_id")) {
// Swallow header row
if (stopId.equals(STOP_ID_INDEX.headerName)) {
// Swallow header row.
continue;
}

stopGroupStops.putIfAbsent(stopGroupId, new HashSet<>());
Set<String> seenStopIds = stopGroupStops.get(stopGroupId);

// Check for SS_01 (stop id appearing in multiple stop groups)
// Check for SS_01 (stop id appearing in multiple stop groups).
// Make sure this error is only returned if we are inside the feed that is being checked.
if (seenStopIds.contains(stopId)) {
registerError(stops
if (feedId.equals(sharedStopFeedId)) {
registerError(stops
.stream()
.filter(stop -> stop.stop_id.equals(stopId))
.findFirst()
.orElse(stops.get(0)), NewGTFSErrorType.MULTIPLE_SHARED_STOPS_GROUPS
);
.orElse(new Stop()), NewGTFSErrorType.MULTIPLE_SHARED_STOPS_GROUPS
);
}
} else {
seenStopIds.add(stopId);
}

// Check for SS_02 (multiple primary stops per stop group)
if (stopGroupsWithPrimaryStops.contains(stopGroupId)) {
registerError(NewGTFSError.forFeed(NewGTFSErrorType.SHARED_STOP_GROUP_MUTLIPLE_PRIMARY_STOPS, stopGroupId));
} else if (configReader.get(IS_PRIMARY_INDEX).equals("true")) {
stopGroupsWithPrimaryStops.add(stopGroupId);
// Check for SS_02 (multiple primary stops per stop group).
if (
configReader.get(headerIndices.get(SharedStopsHeader.IS_PRIMARY_INDEX)).equals("1") ||
configReader.get(headerIndices.get(SharedStopsHeader.IS_PRIMARY_INDEX)).equals("true")
) {
if (stopGroupsWithPrimaryStops.contains(stopGroupId)) {
registerError(NewGTFSError.forFeed(NewGTFSErrorType.SHARED_STOP_GROUP_MULTIPLE_PRIMARY_STOPS, stopGroupId));
} else {
stopGroupsWithPrimaryStops.add(stopGroupId);
}
}

// Check for SS_03 (stop_id referenced doesn't exist)
// Make sure this error is only returned if we are inside the feed that is being checked
// Check for SS_03 (stop_id referenced doesn't exist).
// Make sure this error is only returned if we are inside the feed that is being checked.
if (feedId.equals(sharedStopFeedId) && !stopIds.contains(stopId)) {
registerError(NewGTFSError.forFeed(NewGTFSErrorType.SHARED_STOP_GROUP_ENTITY_DOES_NOT_EXIST, stopId));
}
}
} catch (IOException e) { LOG.error(e.toString()); }
finally {
} catch (IOException e) {
LOG.error("Unable to validate shared stops.", e);
} finally {
configReader.close();
}


}
}
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ public boolean hasBlockingIssuesForPublishing() {
NewGTFSErrorType.TABLE_IN_SUBDIRECTORY,
NewGTFSErrorType.WRONG_NUMBER_OF_FIELDS,
NewGTFSErrorType.MULTIPLE_SHARED_STOPS_GROUPS,
NewGTFSErrorType.SHARED_STOP_GROUP_MUTLIPLE_PRIMARY_STOPS
NewGTFSErrorType.SHARED_STOP_GROUP_MULTIPLE_PRIMARY_STOPS
));
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package com.conveyal.datatools.manager.jobs.validation;

import com.conveyal.datatools.UnitTest;
import com.csvreader.CsvReader;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;

import java.util.stream.Stream;

import static com.zenika.snapshotmatcher.SnapshotMatcher.matchesSnapshot;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;

class SharedStopsValidatorTest extends UnitTest {

@ParameterizedTest
@MethodSource("createCSVHeaders")
void canHandleVariousCorrectCSV(String headers) {
assertThat(SharedStopsValidator.getHeaderIndices(CsvReader.parse(headers)), matchesSnapshot());
}

private static Stream<String> createCSVHeaders() {
return Stream.of(
"is_primary,feed_id,stop_id,stop_group_id",
"feed_id,stop_id,stop_group_id,is_primary",
"feed_id,is_primary,stop_group_id,stop_id,feed_id"
);
}

@ParameterizedTest
@MethodSource("createInvalidCSVArguments")
void attemptToParseInvalidCSV(InvalidCSVArgument invalidCSVArgument) {
CsvReader configReader = CsvReader.parse(invalidCSVArgument.csv);
Exception exception = assertThrows(RuntimeException.class, () -> SharedStopsValidator.getHeaderIndices(configReader));
assertEquals(invalidCSVArgument.expectedError, exception.getMessage());
}

private static Stream<InvalidCSVArgument> createInvalidCSVArguments() {
String invalid = "Unknown shared stops header: ";
String missing = "shared_stops.csv is missing headers!";
return Stream.of(
new InvalidCSVArgument("this is a great string, but it's not a shared_stops csv!", invalid + "this is a great string"),
new InvalidCSVArgument("", invalid),
new InvalidCSVArgument("is_primary,stop_id", missing),
new InvalidCSVArgument("is_primary,feed_id,,stop_group_id", invalid),
new InvalidCSVArgument("is_primary,feed_id,stop_group_id", missing),
new InvalidCSVArgument("feed_id, is_primary, stop_group_id,stop_id", invalid + " is_primary")
);
}

private static class InvalidCSVArgument {
public String csv;
public String expectedError;

public InvalidCSVArgument(String csv, String expectedError) {
this.csv = csv;
this.expectedError = expectedError;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"STOP_GROUP_ID_INDEX" : 3,
"STOP_ID_INDEX" : 2,
"IS_PRIMARY_INDEX" : 0,
"FEED_ID_INDEX" : 1
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"STOP_GROUP_ID_INDEX" : 2,
"STOP_ID_INDEX" : 1,
"IS_PRIMARY_INDEX" : 3,
"FEED_ID_INDEX" : 0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"STOP_GROUP_ID_INDEX" : 2,
"STOP_ID_INDEX" : 3,
"IS_PRIMARY_INDEX" : 1,
"FEED_ID_INDEX" : 4
}

0 comments on commit e14d5a2

Please sign in to comment.