From 9db43dc59e43e24d917c7b150b3a304240b49574 Mon Sep 17 00:00:00 2001 From: Armin Samii Date: Mon, 17 Jun 2024 13:17:45 -0400 Subject: [PATCH] allow missing CVR headers (#818) * allow missing CVR headers * disallow null candidate names * split missing header into new test * spotbugs fix * fix broken test * fix bad merge * Nit and reformatting. --------- Co-authored-by: Armin Samii Co-authored-by: HEdingfield --- .../network/brightspots/rcv/CsvCvrReader.java | 75 ++++++++----- .../brightspots/rcv/TabulatorSession.java | 36 +++--- .../brightspots/rcv/TabulatorTests.java | 6 + .../csv_missing_header_test_config.json | 53 +++++++++ .../csv_missing_header_test_cvr.csv | 27 +++++ ...v_missing_header_test_expected_summary.csv | 32 ++++++ ..._missing_header_test_expected_summary.json | 103 ++++++++++++++++++ 7 files changed, 286 insertions(+), 46 deletions(-) create mode 100644 src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_config.json create mode 100644 src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_cvr.csv create mode 100644 src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_expected_summary.csv create mode 100644 src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_expected_summary.json diff --git a/src/main/java/network/brightspots/rcv/CsvCvrReader.java b/src/main/java/network/brightspots/rcv/CsvCvrReader.java index 33ab4e10..43b515a9 100644 --- a/src/main/java/network/brightspots/rcv/CsvCvrReader.java +++ b/src/main/java/network/brightspots/rcv/CsvCvrReader.java @@ -17,6 +17,8 @@ package network.brightspots.rcv; +import static network.brightspots.rcv.Utils.isNullOrBlank; + import java.io.FileInputStream; import java.io.IOException; import java.nio.charset.Charset; @@ -49,15 +51,10 @@ public String readerName() { public List readCandidateListFromCvr(List castVoteRecords) throws IOException { try (FileInputStream inputStream = new FileInputStream(Path.of(cvrPath).toFile())) { - CSVParser parser = - CSVParser.parse( - inputStream, - Charset.defaultCharset(), - CSVFormat.Builder.create().setHeader().build()); - List rawCandidateNames = parser.getHeaderNames(); - // Split rawCandidateNames from firstVoteColumnIndex to the end - return new ArrayList<>(rawCandidateNames.subList( - firstVoteColumnIndex, rawCandidateNames.size())); + return getCandidateNamesAndInitializeParser(getCsvParser(inputStream)); + } catch (CastVoteRecord.CvrParseException exception) { + Logger.severe("Error reading candidate names:\n%s", exception); + throw new IOException(exception); } catch (IOException exception) { Logger.severe("Error parsing cast vote record:\n%s", exception); throw exception; @@ -69,15 +66,9 @@ public List readCandidateListFromCvr(List castVoteRecord void readCastVoteRecords(List castVoteRecords) throws CastVoteRecord.CvrParseException, IOException { try (FileInputStream inputStream = new FileInputStream(Path.of(cvrPath).toFile())) { - CSVParser parser = - CSVParser.parse( - inputStream, - Charset.defaultCharset(), - CSVFormat.Builder.create().setHeader().build()); - List candidateIds = parser.getHeaderNames(); - int undeclaredWriteInColumn = candidateIds.indexOf(source.getUndeclaredWriteInLabel()); - - parser.stream().skip(firstVoteRowIndex); + CSVParser parser = getCsvParser(inputStream); + List candidateNames = getCandidateNamesAndInitializeParser(parser); + int undeclaredWriteInColumn = candidateNames.indexOf(source.getUndeclaredWriteInLabel()); int index = 0; for (CSVRecord csvRecord : parser) { @@ -85,7 +76,7 @@ void readCastVoteRecords(List castVoteRecords) ArrayList> rankings = new ArrayList<>(); for (int col = firstVoteColumnIndex; col < csvRecord.size(); col++) { String rankAsString = csvRecord.get(col); - if (rankAsString.isBlank()) { + if (isNullOrBlank(rankAsString)) { continue; } int rankAsInt; @@ -99,20 +90,18 @@ void readCastVoteRecords(List castVoteRecords) throw new CastVoteRecord.CvrParseException(); } - String candidateId = candidateIds.get(col); - if (col == undeclaredWriteInColumn) { - candidateId = Tabulator.UNDECLARED_WRITE_IN_OUTPUT_LABEL; - } + int candidateIndex = col - firstVoteColumnIndex; + String candidateId = + candidateIndex == undeclaredWriteInColumn + ? Tabulator.UNDECLARED_WRITE_IN_OUTPUT_LABEL + : candidateNames.get(candidateIndex); rankings.add(new Pair<>(rankAsInt, candidateId)); } // create the new CastVoteRecord - CastVoteRecord newCvr = new CastVoteRecord( - Integer.toString(index), - "no supplied ID", - "no precinct", - "no batch ID", - rankings); + CastVoteRecord newCvr = + new CastVoteRecord( + Integer.toString(index), "no supplied ID", "no precinct", "no batch ID", rankings); castVoteRecords.add(newCvr); } } catch (IOException exception) { @@ -120,4 +109,32 @@ void readCastVoteRecords(List castVoteRecords) throw exception; } } + + private CSVParser getCsvParser(FileInputStream inputStream) throws IOException { + CSVFormat format = + CSVFormat.Builder.create().setAllowMissingColumnNames(true).setNullString("").build(); + return CSVParser.parse(inputStream, Charset.defaultCharset(), format); + } + + /** + * This must be called before CVRs are read. It will return candidate names and skip the + * appropriate number of rows. + */ + private List getCandidateNamesAndInitializeParser(CSVParser parser) + throws CastVoteRecord.CvrParseException { + List candidateNames = new ArrayList<>(); + CSVRecord csvRecord = parser.iterator().next(); + for (int col = firstVoteColumnIndex; col < csvRecord.size(); col++) { + String candidateName = csvRecord.get(col); + if (isNullOrBlank(candidateName)) { + Logger.severe("Candidate name at the top of column %d cannot be empty!", col); + throw new CastVoteRecord.CvrParseException(); + } + candidateNames.add(candidateName); + } + + parser.stream().skip(firstVoteRowIndex - 1); + + return candidateNames; + } } diff --git a/src/main/java/network/brightspots/rcv/TabulatorSession.java b/src/main/java/network/brightspots/rcv/TabulatorSession.java index 2e4e01c5..d78b02f4 100644 --- a/src/main/java/network/brightspots/rcv/TabulatorSession.java +++ b/src/main/java/network/brightspots/rcv/TabulatorSession.java @@ -396,27 +396,29 @@ private LoadedCvrData parseCastVoteRecords(ContestConfig config, Progress progre progress.markFileRead(); } - // Output the RCTab-CSV CVR - try { - ResultsWriter writer = - new ResultsWriter().setContestConfig(config).setTimestampString(timestampString); - this.convertedFilePath = - writer.writeRctabCvrCsv( - castVoteRecords, - cvrSourceData, - config.getOutputDirectory()); - } catch (IOException exception) { - // error already logged in ResultsWriter - } - if (encounteredSourceProblem) { Logger.severe("Parsing cast vote records failed!"); castVoteRecords = null; - } else if (castVoteRecords.isEmpty()) { - Logger.severe("No cast vote records found!"); - castVoteRecords = null; } else { - Logger.info("Parsed %d cast vote records successfully.", castVoteRecords.size()); + if (castVoteRecords.isEmpty()) { + Logger.severe("No cast vote records found!"); + castVoteRecords = null; + } else { + Logger.info("Parsed %d cast vote records successfully.", castVoteRecords.size()); + + // Output the RCTab-CSV CVR + try { + ResultsWriter writer = + new ResultsWriter().setContestConfig(config).setTimestampString(timestampString); + this.convertedFilePath = + writer.writeRctabCvrCsv( + castVoteRecords, + cvrSourceData, + config.getOutputDirectory()); + } catch (IOException exception) { + // error already logged in ResultsWriter + } + } } if (castVoteRecords == null) { diff --git a/src/test/java/network/brightspots/rcv/TabulatorTests.java b/src/test/java/network/brightspots/rcv/TabulatorTests.java index 908b74e0..a17d1a43 100644 --- a/src/test/java/network/brightspots/rcv/TabulatorTests.java +++ b/src/test/java/network/brightspots/rcv/TabulatorTests.java @@ -831,6 +831,12 @@ void genericCsvTest() { runTabulationTest("generic_csv_test"); } + @Test + @DisplayName("CSV missing header test") + void csvMissingHeaderTest() { + runTabulationTest("csv_missing_header_test"); + } + @Test @DisplayName("no one meets minimum test") void noOneMeetsMinimumTest() { diff --git a/src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_config.json b/src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_config.json new file mode 100644 index 00000000..9cdfc1c0 --- /dev/null +++ b/src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_config.json @@ -0,0 +1,53 @@ +{ + "tabulatorVersion": "TEST", + "outputSettings": { + "contestName": "Missing Header CSV Test", + "outputDirectory": "output", + "contestDate": "2024-06-14", + "contestJurisdiction": "Portland, ME", + "contestOffice": "Mayor", + "tabulateByPrecinct": false, + "generateCdfJson": false + }, + "cvrFileSources": [ + { + "filePath" : "csv_missing_header_test_cvr.csv", + "firstVoteColumnIndex" : "2", + "firstVoteRowIndex" : "2", + "provider" : "genericCsv", + "undeclaredWriteInLabel": "Write-In" + } ], + "candidates" : [ { + "name" : "Lettuce", + "code" : "", + "excluded" : false + }, { + "name" : "Broccoli", + "code" : "", + "excluded" : false + }, { + "name" : "Cucumber", + "code" : "", + "excluded" : false + }, { + "name" : "Cauliflower", + "code" : "", + "excluded" : false + } ], + "rules" : { + "tiebreakMode": "useCandidateOrder", + "overvoteRule": "exhaustImmediately", + "winnerElectionMode": "singleWinnerMajority", + "randomSeed": "", + "numberOfWinners": "1", + "decimalPlacesForVoteArithmetic": "4", + "minimumVoteThreshold": "0", + "maxSkippedRanksAllowed": "1", + "maxRankingsAllowed": "15", + "nonIntegerWinningThreshold": false, + "hareQuota": false, + "batchElimination": true, + "exhaustOnDuplicateCandidate": false, + "rulesDescription": "Maine Rules" + } +} diff --git a/src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_cvr.csv b/src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_cvr.csv new file mode 100644 index 00000000..cf710ec2 --- /dev/null +++ b/src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_cvr.csv @@ -0,0 +1,27 @@ +,Lettuce,Broccoli,Cucumber,Cauliflower,Write-In +R_3QEQ85E7gV2DuyG,,2,1,, +R_1ikP1jSJLdWUEKs,2,3,4,1, +R_22G3akvxq7ggl01,3,5,2,1,4 +R_2VsGZJKYqjDtYQ7,4,2,3,1, +R_300GN9Myv0Dad3L,1,3,2,, +R_21yNGL1wKB5ZCPL,4,2,5,3,1 +R_2SdI8sTvnYmvnl8,4,2,1,3,5 +R_2qdAEAZ9ODwFYYx,1,4,2,3, +R_2f6w0fSg9GCXM9I,1,3,2,5,4 +R_3UkrSZJnGxKaYW5,1,2,3,5,4 +R_2rYkKGQ75Qu6Vxr,4,5,2,3,1 +R_3kgFqD03AqBVVZE,3,2,1,4, +R_3fYPpZQbDze9MRx,4,2,3,1, +R_BD73UrqgXrIpnfr,2,3,1,4, +R_2TYqj3GcBmz3FFl,4,2,1,3, +R_AEB4v8Hohce2nv3,3,4,1,5,2 +R_Aiy5eILJk4S5tIt,2,1,3,4, +R_2xDiLvKgBaIxeFv,2,3,4,5,1 +R_124lgkhlQv9Ullj,5,4,3,2,1 +R_21EC0Nti8aqKdm9,5,3,2,4,1 +R_1pDpfl9azbgtFrC,1,2,3,4,5 +R_3EXs2zblwxj5olU,4,3,1,2, +R_1I4UVe24fpZcqPY,2,4,5,3,1 +R_27v7BD6blfCf7BQ,3,1,4,5,2 +R_sUm5EhzNgJ9Fy7f,2,1,3,5,4 +R_33dwoHsyajiIzbF,4,2,1,3, diff --git a/src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_expected_summary.csv b/src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_expected_summary.csv new file mode 100644 index 00000000..d009fe5f --- /dev/null +++ b/src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_expected_summary.csv @@ -0,0 +1,32 @@ +Contest Information +Generated By,RCTab 1.3.999 +CSV Format Version,1 +Type of Election,Single-Winner +Contest,Missing Header CSV Test +Jurisdiction,"Portland, ME" +Office,Mayor +Date,2024-06-14 +Winner(s),Cucumber +Final Threshold,14 + +Contest Summary +Number to be Elected,1 +Number of Candidates,5 +Total Number of Ballots,26 +Number of Undervotes,0 + +Rounds,Round 1 Votes,% of vote,transfer,Round 2 Votes,% of vote,transfer,Round 3 Votes,% of vote,transfer,Round 4 Votes,% of vote,transfer +Eliminated,Undeclared Write-ins,,,Broccoli,,,Cauliflower,,,,, +Elected,,,,,,,,,,Cucumber,, +Cucumber,8,30.76%,2,10,38.46%,0,10,38.46%,4,14,53.84%,0 +Lettuce,5,19.23%,2,7,26.92%,3,10,38.46%,2,12,46.15%,0 +Cauliflower,4,15.38%,1,5,19.23%,1,6,23.07%,-6,0,0.0%,0 +Broccoli,3,11.53%,1,4,15.38%,-4,0,0.0%,0,0,0.0%,0 +Undeclared Write-ins,6,23.07%,-6,0,0.0%,0,0,0.0%,0,0,0.0%,0 +Active Ballots,26,,,26,,,26,,,26,, +Current Round Threshold,14,,,14,,,14,,,14,, +Inactive Ballots by Overvotes,0,,0,0,,0,0,,0,0,,0 +Inactive Ballots by Skipped Rankings,0,,0,0,,0,0,,0,0,,0 +Inactive Ballots by Exhausted Choices,0,,0,0,,0,0,,0,0,,0 +Inactive Ballots by Repeated Rankings,0,,0,0,,0,0,,0,0,,0 +Inactive Ballots Total,0,,0,0,,0,0,,0,0,,0 diff --git a/src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_expected_summary.json b/src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_expected_summary.json new file mode 100644 index 00000000..43bc4589 --- /dev/null +++ b/src/test/resources/network/brightspots/rcv/test_data/csv_missing_header_test/csv_missing_header_test_expected_summary.json @@ -0,0 +1,103 @@ +{ + "config" : { + "contest" : "Missing Header CSV Test", + "date" : "2024-06-14", + "generatedBy" : "RCTab 1.3.999", + "jurisdiction" : "Portland, ME", + "office" : "Mayor" + }, + "jsonFormatVersion" : "1", + "results" : [ { + "inactiveBallots" : { + "exhaustedChoices" : "0", + "overvotes" : "0", + "repeatedRankings" : "0", + "skippedRankings" : "0" + }, + "round" : 1, + "tally" : { + "Broccoli" : "3", + "Cauliflower" : "4", + "Cucumber" : "8", + "Lettuce" : "5", + "Undeclared Write-ins" : "6" + }, + "tallyResults" : [ { + "eliminated" : "Undeclared Write-ins", + "transfers" : { + "Broccoli" : "1", + "Cauliflower" : "1", + "Cucumber" : "2", + "Lettuce" : "2" + } + } ], + "threshold" : "14" + }, { + "inactiveBallots" : { + "exhaustedChoices" : "0", + "overvotes" : "0", + "repeatedRankings" : "0", + "skippedRankings" : "0" + }, + "round" : 2, + "tally" : { + "Broccoli" : "4", + "Cauliflower" : "5", + "Cucumber" : "10", + "Lettuce" : "7" + }, + "tallyResults" : [ { + "eliminated" : "Broccoli", + "transfers" : { + "Cauliflower" : "1", + "Lettuce" : "3" + } + } ], + "threshold" : "14" + }, { + "inactiveBallots" : { + "exhaustedChoices" : "0", + "overvotes" : "0", + "repeatedRankings" : "0", + "skippedRankings" : "0" + }, + "round" : 3, + "tally" : { + "Cauliflower" : "6", + "Cucumber" : "10", + "Lettuce" : "10" + }, + "tallyResults" : [ { + "eliminated" : "Cauliflower", + "transfers" : { + "Cucumber" : "4", + "Lettuce" : "2" + } + } ], + "threshold" : "14" + }, { + "inactiveBallots" : { + "exhaustedChoices" : "0", + "overvotes" : "0", + "repeatedRankings" : "0", + "skippedRankings" : "0" + }, + "round" : 4, + "tally" : { + "Cucumber" : "14", + "Lettuce" : "12" + }, + "tallyResults" : [ { + "elected" : "Cucumber", + "transfers" : { } + } ], + "threshold" : "14" + } ], + "summary" : { + "finalThreshold" : "14", + "numCandidates" : 5, + "numWinners" : 1, + "totalNumBallots" : "26", + "undervotes" : 0 + } +}