Skip to content

Commit

Permalink
fix(gtfs+): properly detect rows with incorrect number of fields
Browse files Browse the repository at this point in the history
  • Loading branch information
landonreed committed Aug 1, 2019
1 parent 8211f5a commit de54615
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import com.conveyal.gtfs.GTFSFeed;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import org.apache.commons.io.input.BOMInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -100,7 +101,10 @@ public static GtfsPlusValidation validate(String feedVersionId) throws Exception
if (tableNode.get("name").asText().equals(entry.getName())) {
LOG.info("Validating GTFS+ table: " + entry.getName());
gtfsPlusTableCount++;
validateTable(validation.issues, tableNode, zipFile.getInputStream(entry), gtfsFeed);
// Skip any byte order mark that may be present. Files must be UTF-8,
// but the GTFS spec says that "files that include the UTF byte order mark are acceptable".
InputStream bis = new BOMInputStream(zipFile.getInputStream(entry));
validateTable(validation.issues, tableNode, bis, gtfsFeed);
}
}
}
Expand All @@ -124,7 +128,7 @@ private static void validateTable(
String line = in.readLine();
String[] inputHeaders = line.split(",");
List<String> fieldList = Arrays.asList(inputHeaders);
JsonNode[] fieldsFounds = new JsonNode[inputHeaders.length];
JsonNode[] fieldsFound = new JsonNode[inputHeaders.length];
JsonNode specFields = specTable.get("fields");
// Iterate over spec fields and check that there are no missing required fields.
for (int i = 0; i < specFields.size(); i++) {
Expand All @@ -133,21 +137,34 @@ private static void validateTable(
int index = fieldList.indexOf(fieldName);
if (index != -1) {
// Add spec field for each field found.
fieldsFounds[index] = specField;
fieldsFound[index] = specField;
} else if (isRequired(specField)) {
// If spec field not found, check that missing field was not required.
issues.add(new ValidationIssue(tableId, fieldName, -1, "Required column missing."));
}
}
// Iterate over each row and validate each field value.
int rowIndex = 0;
int rowsWithWrongNumberOfColumns = 0;
while ((line = in.readLine()) != null) {
String[] values = line.split(Consts.COLUMN_SPLIT, -1);
for (int v = 0; v < values.length; v++) {
validateTableValue(issues, tableId, rowIndex, values[v], fieldsFounds[v], gtfsFeed);
// First, check that row has the correct number of fields.
if (values.length != fieldsFound.length) {
rowsWithWrongNumberOfColumns++;
}
// Validate each value in row. Note: we iterate over the fields and not values because a row may be missing
// columns, but we still want to validate that missing value (e.g., if it is missing a required field).
for (int f = 0; f < fieldsFound.length; f++) {
// If value exists for index, use that. Otherwise, default to null to avoid out of bounds exception.
String val = f < values.length ? values[f] : null;
validateTableValue(issues, tableId, rowIndex, val, fieldsFound[f], gtfsFeed);
}
rowIndex++;
}
// Add issue for wrong number of columns after processing all rows.
if (rowsWithWrongNumberOfColumns > 0) {
issues.add(new ValidationIssue(tableId, null, -1, rowsWithWrongNumberOfColumns + " row(s) do not contain the same number of fields as there are headers. (File may need to be edited manually.)"));
}
}

/** Determine if a GTFS+ spec field is required. */
Expand Down
4 changes: 2 additions & 2 deletions src/main/resources/gtfs/gtfsplus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
- name: realtime_trip_id
required: true
inputType: TEXT
maxLength: 15
# maxLength: 15
columnWidth: 6
helpContent: Corresponding trip_id provided in real-time feed for MTC.

Expand Down Expand Up @@ -324,7 +324,7 @@
- name: service_description
required: true
inputType: TEXT
maxLength: 30
maxLength: 250
helpContent: Description of the service, as it should appear on 511.org such as Weekdays, Sunday/Holiday

- id: farezone_attributes
Expand Down

0 comments on commit de54615

Please sign in to comment.