diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 7fe151840..2616dcec9 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -33,10 +33,10 @@ jobs: java-version: 19 distribution: 'temurin' # Install node 14 for running e2e tests (and for maven-semantic-release). - - name: Use Node.js 18.x + - name: Use Node.js 20.x uses: actions/setup-node@v1 with: - node-version: 18.x + node-version: 20.x - name: Start MongoDB uses: supercharge/mongodb-github-action@1.3.0 with: diff --git a/pom.xml b/pom.xml index 31fbf057d..3e478f354 100644 --- a/pom.xml +++ b/pom.xml @@ -272,7 +272,7 @@ com.github.ibi-group gtfs-lib - 3fa0da9 + 761456fdc66bc1b72fc860b5e5853b0f2904e009 diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/validation/SharedStopsHeader.java b/src/main/java/com/conveyal/datatools/manager/jobs/validation/SharedStopsHeader.java new file mode 100644 index 000000000..9d63ae7ce --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/validation/SharedStopsHeader.java @@ -0,0 +1,37 @@ +package com.conveyal.datatools.manager.jobs.validation; + +import java.util.Map; + +public enum SharedStopsHeader { + + STOP_GROUP_ID_INDEX("stop_group_id"), + + STOP_ID_INDEX("stop_id"), + + IS_PRIMARY_INDEX("is_primary"), + + FEED_ID_INDEX("feed_id"); + + public final String headerName; + + SharedStopsHeader(String name) { + this.headerName = name; + } + + public String getHeaderName() { + return headerName; + } + + public static SharedStopsHeader fromValue(String headerName) { + for (SharedStopsHeader sharedStopsHeader : SharedStopsHeader.values()) { + if (sharedStopsHeader.getHeaderName().equalsIgnoreCase(headerName)) { + return sharedStopsHeader; + } + } + throw new UnsupportedOperationException(String.format("Unknown shared stops header: %s", headerName)); + } + + public static boolean hasRequiredHeaders(Map headerIndices) { + return headerIndices.size() == SharedStopsHeader.values().length; + } +} diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidator.java b/src/main/java/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidator.java index d608fe249..a99e686f2 100644 --- a/src/main/java/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidator.java +++ b/src/main/java/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidator.java @@ -13,11 +13,13 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.HashMap; +import java.util.EnumMap; import java.util.HashSet; import java.util.List; -import java.util.Set; +import java.util.Map; +import static com.conveyal.datatools.manager.jobs.validation.SharedStopsHeader.STOP_GROUP_ID_INDEX; +import static com.conveyal.datatools.manager.jobs.validation.SharedStopsHeader.STOP_ID_INDEX; public class SharedStopsValidator extends FeedValidator { private static final Logger LOG = LoggerFactory.getLogger(SharedStopsValidator.class); @@ -42,6 +44,7 @@ public SharedStopsValidator buildSharedStopsValidator(Feed feed, SQLErrorStorage } return new SharedStopsValidator(feed, errorStorage, this.project, this.feedId); } + public SharedStopsValidator(Feed feed, SQLErrorStorage errorStorage, Project project, String feedId) { super(feed, errorStorage); this.feed = feed; @@ -49,6 +52,45 @@ public SharedStopsValidator(Feed feed, SQLErrorStorage errorStorage, Project pro this.feedId = feedId; } + public static String[] getHeaders(CsvReader configReader) throws IOException { + configReader.setHeaders(configReader.getRawRecord().split(",")); + return configReader.getHeaders(); + } + + public static Map getHeaderIndices(CsvReader configReader) { + Map headerIndices = new EnumMap<>(SharedStopsHeader.class); + + try { + configReader.readRecord(); + String[] headers = getHeaders(configReader); + + for (int i = 0; i < headers.length; i++) { + String header = headers[i]; + switch (SharedStopsHeader.fromValue(header)) { + case STOP_GROUP_ID_INDEX: + headerIndices.put(STOP_GROUP_ID_INDEX, i); + break; + case FEED_ID_INDEX: + headerIndices.put(SharedStopsHeader.FEED_ID_INDEX, i); + break; + case STOP_ID_INDEX: + headerIndices.put(STOP_ID_INDEX, i); + break; + case IS_PRIMARY_INDEX: + headerIndices.put(SharedStopsHeader.IS_PRIMARY_INDEX, i); + break; + } + } + + if (!SharedStopsHeader.hasRequiredHeaders(headerIndices)) { + throw new RuntimeException("shared_stops.csv is missing headers!"); + } + } catch (IOException e) { + throw new RuntimeException("shared_stops.csv was invalid!", e); + } + return headerIndices; + } + @Override public void validate() { String config = project.sharedStopsConfig; @@ -58,12 +100,6 @@ public void validate() { CsvReader configReader = CsvReader.parse(config); - // TODO: pull indicies from the CSV header - final int STOP_GROUP_ID_INDEX = 0; - final int STOP_ID_INDEX = 2; - final int IS_PRIMARY_INDEX = 3; - final int FEED_ID_INDEX = 1; - // Build list of stop Ids. List stopIds = new ArrayList<>(); List stops = new ArrayList<>(); @@ -73,54 +109,59 @@ public void validate() { stopIds.add(stop.stop_id); } - // Initialize hashmaps to hold duplication info - HashMap> stopGroupStops = new HashMap<>(); + // Initialize hashmaps to hold duplication info. + HashSet seenStopIds = new HashSet<>(); HashSet stopGroupsWithPrimaryStops = new HashSet<>(); try { + Map headerIndices = getHeaderIndices(configReader); while (configReader.readRecord()) { - String stopGroupId = configReader.get(STOP_GROUP_ID_INDEX); - String stopId = configReader.get(STOP_ID_INDEX); - String sharedStopFeedId = configReader.get(FEED_ID_INDEX); + String stopGroupId = configReader.get(headerIndices.get(STOP_GROUP_ID_INDEX)); + String stopId = configReader.get(headerIndices.get(STOP_ID_INDEX)); + String sharedStopFeedId = configReader.get(headerIndices.get(SharedStopsHeader.FEED_ID_INDEX)); - if (stopId.equals("stop_id")) { - // Swallow header row + if (stopId.equals(STOP_ID_INDEX.headerName)) { + // Swallow header row. continue; } - stopGroupStops.putIfAbsent(stopGroupId, new HashSet<>()); - Set seenStopIds = stopGroupStops.get(stopGroupId); - - // Check for SS_01 (stop id appearing in multiple stop groups) + // Check for SS_01 (stop id appearing in multiple stop groups). + // Make sure this error is only returned if we are inside the feed that is being checked. if (seenStopIds.contains(stopId)) { - registerError(stops + if (feedId.equals(sharedStopFeedId)) { + registerError(stops .stream() .filter(stop -> stop.stop_id.equals(stopId)) .findFirst() - .orElse(stops.get(0)), NewGTFSErrorType.MULTIPLE_SHARED_STOPS_GROUPS - ); + .orElse(new Stop()), NewGTFSErrorType.MULTIPLE_SHARED_STOPS_GROUPS + ); + } } else { seenStopIds.add(stopId); } - // Check for SS_02 (multiple primary stops per stop group) - if (stopGroupsWithPrimaryStops.contains(stopGroupId)) { - registerError(NewGTFSError.forFeed(NewGTFSErrorType.SHARED_STOP_GROUP_MUTLIPLE_PRIMARY_STOPS, stopGroupId)); - } else if (configReader.get(IS_PRIMARY_INDEX).equals("true")) { - stopGroupsWithPrimaryStops.add(stopGroupId); + // Check for SS_02 (multiple primary stops per stop group). + if ( + configReader.get(headerIndices.get(SharedStopsHeader.IS_PRIMARY_INDEX)).equals("1") || + configReader.get(headerIndices.get(SharedStopsHeader.IS_PRIMARY_INDEX)).equals("true") + ) { + if (stopGroupsWithPrimaryStops.contains(stopGroupId)) { + registerError(NewGTFSError.forFeed(NewGTFSErrorType.SHARED_STOP_GROUP_MULTIPLE_PRIMARY_STOPS, stopGroupId)); + } else { + stopGroupsWithPrimaryStops.add(stopGroupId); + } } - // Check for SS_03 (stop_id referenced doesn't exist) - // Make sure this error is only returned if we are inside the feed that is being checked + // Check for SS_03 (stop_id referenced doesn't exist). + // Make sure this error is only returned if we are inside the feed that is being checked. if (feedId.equals(sharedStopFeedId) && !stopIds.contains(stopId)) { registerError(NewGTFSError.forFeed(NewGTFSErrorType.SHARED_STOP_GROUP_ENTITY_DOES_NOT_EXIST, stopId)); } } - } catch (IOException e) { LOG.error(e.toString()); } - finally { + } catch (IOException e) { + LOG.error("Unable to validate shared stops.", e); + } finally { configReader.close(); } - - } } diff --git a/src/main/java/com/conveyal/datatools/manager/models/FeedVersion.java b/src/main/java/com/conveyal/datatools/manager/models/FeedVersion.java index 060f8589b..071ff9556 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/FeedVersion.java +++ b/src/main/java/com/conveyal/datatools/manager/models/FeedVersion.java @@ -517,7 +517,7 @@ public boolean hasBlockingIssuesForPublishing() { NewGTFSErrorType.TABLE_IN_SUBDIRECTORY, NewGTFSErrorType.WRONG_NUMBER_OF_FIELDS, NewGTFSErrorType.MULTIPLE_SHARED_STOPS_GROUPS, - NewGTFSErrorType.SHARED_STOP_GROUP_MUTLIPLE_PRIMARY_STOPS + NewGTFSErrorType.SHARED_STOP_GROUP_MULTIPLE_PRIMARY_STOPS )); } diff --git a/src/test/java/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidatorTest.java b/src/test/java/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidatorTest.java new file mode 100644 index 000000000..b8a944f71 --- /dev/null +++ b/src/test/java/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidatorTest.java @@ -0,0 +1,61 @@ +package com.conveyal.datatools.manager.jobs.validation; + +import com.conveyal.datatools.UnitTest; +import com.csvreader.CsvReader; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.stream.Stream; + +import static com.zenika.snapshotmatcher.SnapshotMatcher.matchesSnapshot; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class SharedStopsValidatorTest extends UnitTest { + + @ParameterizedTest + @MethodSource("createCSVHeaders") + void canHandleVariousCorrectCSV(String headers) { + assertThat(SharedStopsValidator.getHeaderIndices(CsvReader.parse(headers)), matchesSnapshot()); + } + + private static Stream createCSVHeaders() { + return Stream.of( + "is_primary,feed_id,stop_id,stop_group_id", + "feed_id,stop_id,stop_group_id,is_primary", + "feed_id,is_primary,stop_group_id,stop_id,feed_id" + ); + } + + @ParameterizedTest + @MethodSource("createInvalidCSVArguments") + void attemptToParseInvalidCSV(InvalidCSVArgument invalidCSVArgument) { + CsvReader configReader = CsvReader.parse(invalidCSVArgument.csv); + Exception exception = assertThrows(RuntimeException.class, () -> SharedStopsValidator.getHeaderIndices(configReader)); + assertEquals(invalidCSVArgument.expectedError, exception.getMessage()); + } + + private static Stream createInvalidCSVArguments() { + String invalid = "Unknown shared stops header: "; + String missing = "shared_stops.csv is missing headers!"; + return Stream.of( + new InvalidCSVArgument("this is a great string, but it's not a shared_stops csv!", invalid + "this is a great string"), + new InvalidCSVArgument("", invalid), + new InvalidCSVArgument("is_primary,stop_id", missing), + new InvalidCSVArgument("is_primary,feed_id,,stop_group_id", invalid), + new InvalidCSVArgument("is_primary,feed_id,stop_group_id", missing), + new InvalidCSVArgument("feed_id, is_primary, stop_group_id,stop_id", invalid + " is_primary") + ); + } + + private static class InvalidCSVArgument { + public String csv; + public String expectedError; + + public InvalidCSVArgument(String csv, String expectedError) { + this.csv = csv; + this.expectedError = expectedError; + } + } +} diff --git a/src/test/resources/snapshots/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidatorTest/canHandleVariousCorrectCSV-0.json b/src/test/resources/snapshots/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidatorTest/canHandleVariousCorrectCSV-0.json new file mode 100644 index 000000000..efafa6fbc --- /dev/null +++ b/src/test/resources/snapshots/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidatorTest/canHandleVariousCorrectCSV-0.json @@ -0,0 +1,6 @@ +{ + "STOP_GROUP_ID_INDEX" : 3, + "STOP_ID_INDEX" : 2, + "IS_PRIMARY_INDEX" : 0, + "FEED_ID_INDEX" : 1 +} \ No newline at end of file diff --git a/src/test/resources/snapshots/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidatorTest/canHandleVariousCorrectCSV-1.json b/src/test/resources/snapshots/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidatorTest/canHandleVariousCorrectCSV-1.json new file mode 100644 index 000000000..0a5c3f7a5 --- /dev/null +++ b/src/test/resources/snapshots/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidatorTest/canHandleVariousCorrectCSV-1.json @@ -0,0 +1,6 @@ +{ + "STOP_GROUP_ID_INDEX" : 2, + "STOP_ID_INDEX" : 1, + "IS_PRIMARY_INDEX" : 3, + "FEED_ID_INDEX" : 0 +} \ No newline at end of file diff --git a/src/test/resources/snapshots/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidatorTest/canHandleVariousCorrectCSV-2.json b/src/test/resources/snapshots/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidatorTest/canHandleVariousCorrectCSV-2.json new file mode 100644 index 000000000..351f7a0d6 --- /dev/null +++ b/src/test/resources/snapshots/com/conveyal/datatools/manager/jobs/validation/SharedStopsValidatorTest/canHandleVariousCorrectCSV-2.json @@ -0,0 +1,6 @@ +{ + "STOP_GROUP_ID_INDEX" : 2, + "STOP_ID_INDEX" : 3, + "IS_PRIMARY_INDEX" : 1, + "FEED_ID_INDEX" : 4 +} \ No newline at end of file