From dcbf6b5e513aee96d4a385e988af25853521d499 Mon Sep 17 00:00:00 2001 From: Mathew Ruberg Date: Mon, 9 Dec 2024 11:26:55 -0500 Subject: [PATCH] 898: Dominion CvrExport parsing (#899) * get all CvrExport_N.json files to parse * linter cleanup * linter cleanup 2 * more concise regex to find all dominion files and sort them consistently * complete rename of cvr_0 to cvr_6 * null check dominion cvr file list --------- Co-authored-by: yezr <8996546+yezr@users.noreply.github.com> --- .../brightspots/rcv/DominionCvrReader.java | 41 ++++++++++++------- .../{CvrExport_0.json => CvrExport_6.json} | 0 2 files changed, 27 insertions(+), 14 deletions(-) rename src/test/resources/network/brightspots/rcv/test_data/dominion_multi_file/dominion_multi_file_input_data/{CvrExport_0.json => CvrExport_6.json} (100%) diff --git a/src/main/java/network/brightspots/rcv/DominionCvrReader.java b/src/main/java/network/brightspots/rcv/DominionCvrReader.java index bc1912454..6ed6cf86b 100644 --- a/src/main/java/network/brightspots/rcv/DominionCvrReader.java +++ b/src/main/java/network/brightspots/rcv/DominionCvrReader.java @@ -25,6 +25,8 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -230,30 +232,41 @@ private void validateNamesAreInContest(List castVoteRecords) private void gatherCvrsForContest(List castVoteRecords, String contestIdToLoad) { try { Path singleCvrPath = Paths.get(cvrPath, CVR_EXPORT); - Path firstCvrPath = Paths.get(cvrPath, String.format(CVR_EXPORT_PATTERN, 0)); + if (singleCvrPath.toFile().exists()) { HashMap json = JsonParser.readFromFile(singleCvrPath.toString(), HashMap.class); parseCvrFile(json, castVoteRecords, contestIdToLoad); - } else if (firstCvrPath.toFile().exists()) { + } else { + // We are expecting multiple CvrExport_N.json files + String regexPath = CVR_EXPORT_PATTERN.replaceAll("%d", "\\\\d+"); + File cvrDirectory = new File(cvrPath); + File[] matchedCvrFileArray = cvrDirectory.listFiles((dir, name) -> name.matches(regexPath)); + + if (matchedCvrFileArray == null || matchedCvrFileArray.length == 0) { + String errorMessage = "Error parsing Dominion cast vote records:" + + " CvrExport.json file(s) not located"; + throw new FileNotFoundException(errorMessage); + } + + List matchedCvrFiles = Arrays.asList(matchedCvrFileArray); + matchedCvrFiles.sort(Comparator.comparing(File::getAbsolutePath)); + int recordsParsed = 0; + int filesParsed = 0; int recordsParsedAtLastLog = 0; - int cvrSequence = 0; - Path cvrFilePath = Paths.get(cvrPath, String.format(CVR_EXPORT_PATTERN, cvrSequence)); - while (cvrFilePath.toFile().exists()) { - HashMap json = JsonParser.readFromFile(cvrFilePath.toString(), HashMap.class); + + for (File file : matchedCvrFiles) { + HashMap json = JsonParser.readFromFile(file.toString(), HashMap.class); recordsParsed += parseCvrFile(json, castVoteRecords, contestIdToLoad); + filesParsed++; + if (recordsParsed - recordsParsedAtLastLog > 50000) { - Logger.info("Parsed %d records from %d files", recordsParsed, cvrSequence); + Logger.info("Parsed %d records from %d files", recordsParsed, filesParsed); recordsParsedAtLastLog = recordsParsed; } - cvrSequence++; - cvrFilePath = Paths.get(cvrPath, String.format(CVR_EXPORT_PATTERN, cvrSequence)); } - } else { - throw new FileNotFoundException( - String.format( - "Error parsing cast vote record: neither %s nor %s exists", - singleCvrPath, firstCvrPath)); + + Logger.info("Parsed %d total records from %d total files", recordsParsed, filesParsed); } } catch (FileNotFoundException | CvrParseException exception) { Logger.severe("Error parsing cast vote record:\n%s", exception); diff --git a/src/test/resources/network/brightspots/rcv/test_data/dominion_multi_file/dominion_multi_file_input_data/CvrExport_0.json b/src/test/resources/network/brightspots/rcv/test_data/dominion_multi_file/dominion_multi_file_input_data/CvrExport_6.json similarity index 100% rename from src/test/resources/network/brightspots/rcv/test_data/dominion_multi_file/dominion_multi_file_input_data/CvrExport_0.json rename to src/test/resources/network/brightspots/rcv/test_data/dominion_multi_file/dominion_multi_file_input_data/CvrExport_6.json