From b8d268a1a2a11c22739ce8a5a8aa2457f0d17711 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 29 May 2020 10:50:09 -0400
Subject: [PATCH 01/57] The (very limited) changes that went into the
 application to accommodate the external "custom download" service. Everything
 else is done by an outside standalone program (a java program with its own
 pom file). (#6505)

---
 .../iq/dataverse/FileDownloadServiceBean.java | 93 +++++++++++++++++--
 .../settings/SettingsServiceBean.java         |  8 +-
 2 files changed, 92 insertions(+), 9 deletions(-)
diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
index 8e9695e086d..7a1cb04f776 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
@@ -6,6 +6,8 @@
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
 import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
 import edu.harvard.iq.dataverse.authorization.users.User;
+import edu.harvard.iq.dataverse.dataaccess.DataAccess;
+import edu.harvard.iq.dataverse.dataaccess.StorageIO;
 import edu.harvard.iq.dataverse.datasetutility.WorldMapPermissionHelper;
 import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
 import edu.harvard.iq.dataverse.engine.command.impl.CreateGuestbookResponseCommand;
@@ -16,6 +18,7 @@
 import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry;
 import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
 import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean;
+import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.FileUtil;
 import java.io.IOException;
 import java.sql.Timestamp;
@@ -23,6 +26,7 @@
 import java.util.Arrays;
 import java.util.Date;
 import java.util.List;
+import java.util.UUID;
 import java.util.logging.Logger;
 import javax.ejb.EJB;
 import javax.ejb.Stateless;
@@ -68,6 +72,8 @@ public class FileDownloadServiceBean implements java.io.Serializable {
     AuthenticationServiceBean authService;
     @EJB
     PrivateUrlServiceBean privateUrlService;
+    @EJB
+    SettingsServiceBean settingsService;
 
     @Inject
     DataverseSession session;
@@ -119,21 +125,47 @@ public void writeGuestbookAndStartBatchDownload(GuestbookResponse guestbookRespo
         }
         
         // OK, this is a real batch (multi-file) download. 
+        
+        String customZipDownloadUrl = settingsService.getValueForKey(SettingsServiceBean.Key.CustomZipDownloadServiceUrl);
+        boolean useCustomZipService = customZipDownloadUrl != null; 
+        String zipServiceKey = null; 
+
         // Do we need to write GuestbookRecord entries for the files? 
-        if (!doNotSaveGuestbookRecord) {
+        if (!doNotSaveGuestbookRecord || useCustomZipService) {
 
             List<String> list = new ArrayList<>(Arrays.asList(guestbookResponse.getSelectedFileIds().split(",")));
-
+            Timestamp timestamp = null; 
+            
             for (String idAsString : list) {
-                DataFile df = datafileService.findCheapAndEasy(new Long(idAsString));
+                //DataFile df = datafileService.findCheapAndEasy(new Long(idAsString));
+                DataFile df = datafileService.find(new Long(idAsString));
                 if (df != null) {
-                    guestbookResponse.setDataFile(df);
-                    writeGuestbookResponseRecord(guestbookResponse);
+                    if (!doNotSaveGuestbookRecord) {
+                        guestbookResponse.setDataFile(df);
+                        writeGuestbookResponseRecord(guestbookResponse);
+                    }
+                    
+                    if (useCustomZipService) {
+                        if (zipServiceKey == null) {
+                            zipServiceKey = generateServiceKey();
+                        }
+                        if (timestamp == null) {
+                            timestamp = new Timestamp(new Date().getTime());
+                        }
+                        
+                        //FileMetadata fm = datafileService.findFileMetadataByDatasetVersionIdAndDataFileId(guestbookResponse.getDatasetVersion().getId(), new Long(idAsString));
+                        addFileToCustomZipJob(zipServiceKey, df, timestamp);
+                    }
                 }
             }
         }
-
-        redirectToBatchDownloadAPI(guestbookResponse.getSelectedFileIds(), "original".equals(guestbookResponse.getFileFormat()));
+        
+        if (useCustomZipService) {
+            redirectToCustomZipDownloadService(customZipDownloadUrl, zipServiceKey, "original".equals(guestbookResponse.getFileFormat()));
+        } else {
+            // Use the "normal" /api/access/datafiles/ API:
+            redirectToBatchDownloadAPI(guestbookResponse.getSelectedFileIds(), "original".equals(guestbookResponse.getFileFormat()));
+        }
     }
     
     public void writeGuestbookAndStartFileDownload(GuestbookResponse guestbookResponse, FileMetadata fileMetadata, String format) {
@@ -219,6 +251,17 @@ private void redirectToBatchDownloadAPI(String multiFileString, Boolean guestboo
         }
 
     }
+    
+    private void redirectToCustomZipDownloadService(String customZipServiceUrl, String jobKey, Boolean downloadOriginal) {
+        
+        customZipServiceUrl += "?" + jobKey; 
+        
+        try {
+            FacesContext.getCurrentInstance().getExternalContext().redirect(customZipServiceUrl);
+        } catch (IOException ex) {
+            logger.info("Failed to issue a redirect to the custom Zip download service.");
+        }
+    }
 
     private void redirectToDownloadAPI(String downloadType, Long fileId, boolean guestBookRecordAlreadyWritten, Long fileMetadataId) {
         String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten, fileMetadataId);
@@ -490,6 +533,40 @@ public void sendRequestFileAccessNotification(Dataset dataset, Long fileId, Auth
             userNotificationService.sendNotification(au, new Timestamp(new Date().getTime()), UserNotification.Type.REQUESTFILEACCESS, fileId, null, requestor, false);
         });
 
-    }    
+    } 
+    
+    public String generateServiceKey() {
+        UUID uid = UUID.randomUUID();
+        // last 8 bytes, of the random UUID, 16 hex digits: 
+        return uid.toString().substring(20);
+    }
+    
+    public void addFileToCustomZipJob(String key, DataFile dataFile, Timestamp timestamp) {
+        String location = null; 
+        String fileName = null; 
+        
+        try {
+            StorageIO<DataFile> storageIO = DataAccess.getStorageIO(dataFile);
+            location = storageIO.getStorageLocation();
+        } catch (IOException ioex) {
+            logger.info("Failed to open StorageIO for datafile " + dataFile.getId());
+        }
+        
+        if (dataFile.getFileMetadata() != null) {
+            fileName = dataFile.getFileMetadata().getLabel();
+        }
+                
+        if (location != null && fileName != null) {
+            em.createNativeQuery("INSERT INTO CUSTOMZIPSERVICEREQUEST (KEY, STORAGELOCATION, FILENAME, ISSUETIME) VALUES ("
+                    + "'" + key + "',"
+                    + "'" + location + "',"
+                    + "'" + fileName + "',"
+                    + "'" + timestamp + "');").executeUpdate();
+        }
+        
+        // TODO: 
+        // While we are here, issue another query, to delete all the entries that are 
+        // more than 5 minutes (or so?) old
+    }
     
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
index b4049c9b1cf..26baaaaaab9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
@@ -426,7 +426,13 @@ Whether Harvesting (OAI) service is enabled
         /**
          * Validate physical files for all the datafiles in the dataset when publishing
          */
-        FileValidationOnPublishEnabled
+        FileValidationOnPublishEnabled,
+        /**
+         * If defined, this is the URL of the zipping service outside 
+         * the main Application Service where zip downloads should be directed
+         * instead of /api/access/datafiles/
+         */
+        CustomZipDownloadServiceUrl
         ;
 
         @Override

From 8b1765a5ec6cfd30412db7486ea23955d112c7da Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 29 May 2020 19:33:26 -0400
Subject: [PATCH 02/57] components of the standalone zipper (#6505). still
 working on the documentation, so will need to check it in later.

---
 scripts/zipdownload/README.md                 |  14 ++
 scripts/zipdownload/cgi-bin/zipdownload       |  11 +
 scripts/zipdownload/pom.xml                   | 107 ++++++++++
 .../download/ChunkingOutputStream.java        | 106 ++++++++++
 .../service/download/ZipDownloadService.java  | 194 ++++++++++++++++++
 .../service/util/DatabaseAccessUtil.java      | 132 ++++++++++++
 .../custom/service/util/DirectAccessUtil.java | 111 ++++++++++
 .../V4.20.0.4__6505-zipdownload-jobs.sql      |   2 +
 8 files changed, 677 insertions(+)
 create mode 100644 scripts/zipdownload/README.md
 create mode 100644 scripts/zipdownload/cgi-bin/zipdownload
 create mode 100644 scripts/zipdownload/pom.xml
 create mode 100644 scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ChunkingOutputStream.java
 create mode 100644 scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java
 create mode 100644 scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DatabaseAccessUtil.java
 create mode 100644 scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DirectAccessUtil.java
 create mode 100644 src/main/resources/db/migration/V4.20.0.4__6505-zipdownload-jobs.sql

diff --git a/scripts/zipdownload/README.md b/scripts/zipdownload/README.md
new file mode 100644
index 00000000000..3b576bdd58f
--- /dev/null
+++ b/scripts/zipdownload/README.md
@@ -0,0 +1,14 @@
+Work in progress!
+
+to build: 
+
+clean compile assembly:single
+
+to install: 
+
+install cgi-bin/zipdownload and ZipDownloadService-v1.0.0.jar in your cgi-bin directory (/var/www/cgi-bin standard). 
+Edit the config lines in the shell script (zipdownload) as needed. 
+
+to activate in Dataverse: 
+
+curl -X PUT -d '/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:CustomZipDownloadServiceUrl
diff --git a/scripts/zipdownload/cgi-bin/zipdownload b/scripts/zipdownload/cgi-bin/zipdownload
new file mode 100644
index 00000000000..c2a9db8ca9b
--- /dev/null
+++ b/scripts/zipdownload/cgi-bin/zipdownload
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+CLASSPATH=/var/www/cgi-bin; export CLASSPATH
+
+PGHOST="localhost"; export PGHOST
+PGPORT=5432; 	    export PGPORT
+PGUSER="dvnapp";    export PGUSER
+PGDB="dvndb"; 	    export PGDB
+PGPW="xxxxx"; 	    export PGPW
+
+java -Ddb.serverName=$PGHOST -Ddb.portNumber=$PGPORT -Ddb.user=$PGUSER -Ddb.databaseName=$PGDB -Ddb.password=$PGPW -jar ZipDownloadService-v1.0.0.jar
\ No newline at end of file
diff --git a/scripts/zipdownload/pom.xml b/scripts/zipdownload/pom.xml
new file mode 100644
index 00000000000..60a5a5814ab
--- /dev/null
+++ b/scripts/zipdownload/pom.xml
@@ -0,0 +1,107 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>ZipDownloadService</groupId>
+    <artifactId>ZipDownloadService</artifactId>
+    <version>1.0.0</version>
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    </properties>
+    <pluginRepositories>
+        <pluginRepository>
+            <id>central</id>
+            <name>Central Repository</name>
+            <url>https://repo.maven.apache.org/maven2</url>
+            <layout>default</layout>
+            <snapshots>
+                <enabled>false</enabled>
+            </snapshots>
+            <releases>
+                <updatePolicy>never</updatePolicy>
+            </releases>
+        </pluginRepository>
+    </pluginRepositories>
+    <repositories>
+        <repository>
+            <id>central-repo</id>
+            <name>Central Repository</name>
+            <url>https://repo1.maven.org/maven2</url>
+            <layout>default</layout>
+        </repository>
+        <repository>
+            <id>prime-repo</id>
+            <name>PrimeFaces Maven Repository</name>
+            <url>https://repository.primefaces.org</url>
+            <layout>default</layout>
+        </repository>
+        <repository>
+            <id>dataone.org</id>
+            <url>https://maven.dataone.org</url>
+            <releases>
+                <enabled>true</enabled>
+            </releases>
+            <snapshots>
+                <enabled>true</enabled>
+            </snapshots>
+        </repository>
+        <repository>
+            <id>dvn.private</id>
+            <name>Local repository for hosting jars not available from network repositories.</name>
+            <url>file://${project.basedir}/local_lib</url>
+        </repository>
+    </repositories>
+    <dependencyManagement>
+        <dependencies>
+            <dependency>
+                <groupId>com.amazonaws</groupId>
+                <artifactId>aws-java-sdk-bom</artifactId>
+                <version>1.11.790</version>
+                <type>pom</type>
+                <scope>import</scope>
+            </dependency>
+        </dependencies>
+    </dependencyManagement>
+    <dependencies>
+        <!-- https://mvnrepository.com/artifact/org.postgresql/postgresql -->
+        <dependency>
+            <groupId>org.postgresql</groupId>
+            <artifactId>postgresql</artifactId>
+            <version>42.2.2</version>
+        </dependency>
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-java-sdk-s3</artifactId>
+        </dependency>
+    </dependencies>
+    <build>
+        <sourceDirectory>src/main/java</sourceDirectory>
+        <plugins>
+            <plugin>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.1</version>
+                <configuration>
+                    <source>1.8</source>
+                    <target>1.8</target>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <version>2.4</version>
+                <configuration>
+                    <archive>
+                        <manifest>
+                            <mainClass>edu.harvard.iq.dataverse.custom.service.download.ZipDownloadService</mainClass>
+                        </manifest>
+                    </archive>
+                    <descriptorRefs>
+                        <descriptorRef>jar-with-dependencies</descriptorRef>
+                    </descriptorRefs>
+                    <finalName>${project.artifactId}-v${project.version}</finalName>
+                    <appendAssemblyId>false</appendAssemblyId>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+</project>
diff --git a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ChunkingOutputStream.java b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ChunkingOutputStream.java
new file mode 100644
index 00000000000..296847ad834
--- /dev/null
+++ b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ChunkingOutputStream.java
@@ -0,0 +1,106 @@
+/*
+   Copyright (C) 2005-2012, by the President and Fellows of Harvard College.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+         http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+   Dataverse Network - A web application to share, preserve and analyze research data.
+   Developed at the Institute for Quantitative Social Science, Harvard University.
+   Version 3.0.
+*/
+package edu.harvard.iq.dataverse.custom.service.download;
+
+import java.io.FilterOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * Simple implementation of HTTP "chunking encoding"
+ * 
+ * @author Leonid Andreev
+ */
+public class ChunkingOutputStream extends FilterOutputStream {
+    private static final int BUFFER_SIZE = 8192;
+    private static final byte[] CHUNK_CLOSE = "\r\n".getBytes();
+    private static final String CHUNK_SIZE_FORMAT = "%x\r\n";
+    
+    private byte[] buffer = new byte[BUFFER_SIZE];
+    private int position = 0; 
+    
+    public ChunkingOutputStream(OutputStream out) {
+        super(out);
+    }
+    
+    @Override
+    public void write(byte[] data) throws IOException {
+        this.write(data, 0, data.length);
+    }
+    
+    @Override
+    public void write(byte[] data, int offset, int length) throws IOException {
+        
+        // is this going to fill the buffer?
+        if (position + length > BUFFER_SIZE) {
+            System.arraycopy(data, offset, buffer, position, BUFFER_SIZE - position);
+            offset += (BUFFER_SIZE - position);
+            length -= (BUFFER_SIZE - position);
+            dumpChunk(buffer, 0, BUFFER_SIZE);
+            position = 0; 
+        }
+        
+        // are there still multiple buffer-worths of bytes? 
+        while (length > BUFFER_SIZE) {
+            dumpChunk(data, offset, BUFFER_SIZE);
+            offset += BUFFER_SIZE;
+            length -= BUFFER_SIZE;
+        }
+        
+        // finally, buffer the leftover bytes:
+        System.arraycopy(data, offset, buffer, position, length);
+        position+=length;
+        
+    }
+    
+    @Override
+    public void write(int i) throws IOException {
+        // Hopefully ZipOutputStream never writes single bytes into the stream?
+        // Uh, actually it does, *a lot* - at the beginning of the archive, and 
+        // when it closes it. 
+        
+        if (position == BUFFER_SIZE) {
+            dumpChunk(buffer, 0, position);
+            position = 0;
+        }
+        buffer[position++] = (byte)i;
+    }
+
+    @Override
+    public void close() throws IOException {
+        if (position > 0) {
+            dumpChunk(buffer, 0, position);
+        }
+        
+        // ... and the final, "zero chunk": 
+        super.out.write('0');
+        super.out.write(CHUNK_CLOSE);
+        super.out.write(CHUNK_CLOSE);
+        
+        super.out.close();
+    }
+    
+    
+    private void dumpChunk(byte[] data, int offset, int length) throws IOException {
+        String chunkSizeLine = String.format(CHUNK_SIZE_FORMAT, length);
+        super.out.write(chunkSizeLine.getBytes());
+        super.out.write(data, offset, length);
+    }
+}
diff --git a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java
new file mode 100644
index 00000000000..0669aa7919f
--- /dev/null
+++ b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java
@@ -0,0 +1,194 @@
+/*
+   Copyright (C) 2005-2012, by the President and Fellows of Harvard College.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+         http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+   Dataverse Network - A web application to share, preserve and analyze research data.
+   Developed at the Institute for Quantitative Social Science, Harvard University.
+   Version 3.0.
+*/
+package edu.harvard.iq.dataverse.custom.service.download;
+
+import edu.harvard.iq.dataverse.custom.service.util.DirectAccessUtil;
+import static edu.harvard.iq.dataverse.custom.service.util.DatabaseAccessUtil.lookupZipJob;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+
+/**
+ * Custom (standalone) download service for Dataverse
+ * 
+ * @author Leonid Andreev
+ */
+public class ZipDownloadService { 
+    
+    private static String jobKey = null;
+    private List<String[]> jobFiles = null;
+    private boolean zipOnly = false; 
+    
+    private DirectAccessUtil directAccessUtil = null; 
+    private ZipOutputStream zipOutputStream = null;
+
+    public static void main(String args[]) throws Exception {
+        
+        ZipDownloadService zipperService = new ZipDownloadService();
+        
+        if (!zipperService.parseArgs(args)) {
+            zipperService.usage();
+            return; 
+        }
+        
+        zipperService.parseCgiQueryParameters();
+               
+        zipperService.execute(jobKey);
+    }
+
+    private static void usage() {
+        System.out.println("\nUsage:");
+        System.out.println("  java -jar ZipDownloadService-1.0.0.jar [-ziponly]>\n");
+
+        System.out.println("  supported options:");
+        System.out.println("   -ziponly = output zip only, no http header/no chunking");
+        System.out.println("");
+
+    }
+
+    // The only option supported at the moment is "zip only" - output just the
+    // compressed stream, skip the HTTP header and chunking.
+    public boolean parseArgs(String[] args) {
+
+        if (args == null || args.length == 0) {
+            return true; 
+        } else if (args.length == 1) {
+            if (args[0].equals("-ziponly")) {
+                this.zipOnly = true;
+                return true;
+            }
+        }
+        
+        return false; 
+    }
+    
+    // Does not support any parameters, except the job-identifying token key, 
+    // supplied as the entire query string. 
+    public void parseCgiQueryParameters() {
+        String queryString = System.getenv().get("QUERY_STRING");
+        if (queryString != null) {
+            jobKey = queryString; 
+        }
+    }
+    
+    public void print404() {
+        System.out.println("Status: 404 Not Found\r");
+        System.out.println("Content-Type: text/html\r");
+        System.out.println("\r");
+
+        System.out.println("<h1>404 No such download job!</h1>");
+    }
+    
+    public void printZipHeader() {
+        System.out.println("Content-disposition: attachment; filename=\"dataverse_files.zip\"\r");
+        System.out.println("Content-Type: application/zip; name=\"dataverse_files.zip\"\r");
+        System.out.println("Transfer-Encoding: chunked\r");
+        System.out.println("\r");
+        System.out.flush();
+    }
+    
+    public void execute(String key) {
+        
+        jobFiles = lookupZipJob(key); 
+        
+        if (jobFiles == null || jobFiles.size() == 0) {
+            this.print404();
+            System.exit(0);
+        }
+        
+        this.processFiles();
+    }
+    
+    public void processFiles() {
+        
+        if (!this.zipOnly) {
+            this.printZipHeader();
+        }
+        
+        Set<String> zippedFolders = new HashSet<>();
+       
+        for (String [] fileEntry : jobFiles) {
+            String storageLocation = fileEntry[0];
+            String fileName = fileEntry[1];
+            
+            //System.out.println(storageLocation + ":" + fileName);
+            
+            if (this.zipOutputStream == null) {
+                openZipStream();
+            }
+            
+            if (this.directAccessUtil == null) {
+                this.directAccessUtil = new DirectAccessUtil();
+            }
+            
+            InputStream inputStream = this.directAccessUtil.openDirectAccess(storageLocation);
+                
+            // TODO: folders
+            // TODO: String zipEntryName = checkZipEntryName(fileName);
+            if (inputStream != null && this.zipOutputStream != null) {
+                ZipEntry entry = new ZipEntry(fileName);
+
+                byte[] bytes = new byte[2 * 8192];
+                int read = 0;
+                long readSize = 0L;
+
+                try {
+                    this.zipOutputStream.putNextEntry(entry);
+
+                    while ((read = inputStream.read(bytes)) != -1) {
+                        this.zipOutputStream.write(bytes, 0, read);
+                        readSize += read;
+                    }
+                    inputStream.close();
+                    this.zipOutputStream.closeEntry();
+                    
+                    /*if (fileSize == readSize) {
+                        //System.out.println("Read "+readSize+" bytes;");
+                    } else {
+                        throw new IOException("Byte size mismatch: expected " + fileSize + ", read: " + readSize);
+                    }*/
+                } catch (IOException ioex) {
+                    System.err.println("Faile to compress "+storageLocation);
+                }
+            } else {
+                System.err.println("Failed to access "+storageLocation);
+            }
+                
+        }
+        try {
+            this.zipOutputStream.flush();
+            this.zipOutputStream.close();
+
+            System.out.flush();
+            System.out.close();
+        } catch (Exception e) {
+        }
+    }
+    
+    public void openZipStream() {
+        if (this.zipOutputStream == null) {
+            this.zipOutputStream = new ZipOutputStream(new ChunkingOutputStream(System.out));
+        }
+    }
+}
diff --git a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DatabaseAccessUtil.java b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DatabaseAccessUtil.java
new file mode 100644
index 00000000000..5ae01d985e4
--- /dev/null
+++ b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DatabaseAccessUtil.java
@@ -0,0 +1,132 @@
+/*
+   Copyright (C) 2005-2012, by the President and Fellows of Harvard College.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+         http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+   Dataverse Network - A web application to share, preserve and analyze research data.
+   Developed at the Institute for Quantitative Social Science, Harvard University.
+   Version 3.0.
+*/
+
+package edu.harvard.iq.dataverse.custom.service.util;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Utility methods for directly accessing the Dataverse database to extract 
+ * the file locations and parameters for the zipping jobs. 
+ * 
+ * @author Leonid Andreev
+ */
+public class DatabaseAccessUtil implements java.io.Serializable  {
+
+    // The zipper needs to make one database call to initiate each job.
+    // So the database connection can be closed immediately.
+
+    public static List<String []> lookupZipJob(String jobKey) {
+        Connection c = connectToDatabase();
+        
+        if (c == null) {
+            // no connection - no data, return null queitly
+            return null; 
+        }
+        
+        Statement stmt; 
+        ResultSet rs; 
+        
+        List<String[]> ret = new ArrayList<>();
+        
+        try {
+            c.setAutoCommit(false);
+
+            stmt = c.createStatement();
+            rs = stmt.executeQuery( "SELECT * FROM CustomZipServiceRequest WHERE key='" + jobKey +"';" );
+            
+            while ( rs.next() ) {
+                String storageLocation = rs.getString("storageLocation");
+                String  fileName = rs.getString("fileName");
+                
+                //System.out.println( "storageLocation = " + storageLocation );
+                //System.out.println( "fileName = " + fileName );
+                
+                String[] entry = new String[2];
+                entry[0] = storageLocation;
+                entry[1] = fileName;
+                
+                ret.add(entry);
+            }
+            rs.close();
+            stmt.close();            
+        } catch (Exception e) {
+            System.err.println( "Database error: " + e.getClass().getName()+" "+ e.getMessage() );
+            // return null (but close the connection first):
+            try {
+                c.close();
+            } catch (Exception ex) {}
+            return null;
+        }
+        
+        // Delete all the entries associated with the job, now that we are done
+        // with it. 
+        // Alternatively, the db user whose credentials the zipper is using
+        // may be given only read-only access to the table; and it could be the 
+        // job of the Dataverse application to, say, automatically delete all the 
+        // entries older than 5 min. every time it accesses the table on its side.
+        
+        /*try {
+            stmt = c.createStatement();
+            stmt.executeUpdate("DELETE FROM CustomZipServiceRequest WHERE key='" + jobKey +"';");
+            c.commit();
+        } catch (Exception e) {
+            // Not much we can or want to do, but complain in the Apache logs:
+            System.err.println("Failed to delete the job from the db");
+        }*/
+        
+        try {
+            c.close();
+        } catch (Exception e) {}
+
+        return ret;
+    }
+    
+    // Opens the connection to the database. 
+    // Uses the credentials supplied via JVM options
+    private static Connection connectToDatabase() {
+        Connection c = null;
+
+        String host = System.getProperty("db.serverName") != null ? System.getProperty("db.serverName") : "localhost";
+        String port = System.getProperty("db.portNumber") != null ? System.getProperty("db.portNumber") : "5432";
+        String database = System.getProperty("db.databaseName") != null ? System.getProperty("db.databaseName") : "dvndb";
+        String pguser = System.getProperty("db.user") != null ? System.getProperty("db.user") : "dvnapp";
+        String pgpasswd = System.getProperty("db.password") != null ? System.getProperty("db.password") : "secret";
+
+        try {
+            Class.forName("org.postgresql.Driver");
+            c = DriverManager
+                    .getConnection("jdbc:postgresql://" + host + ":" + port + "/" + database,
+                            pguser,
+                            pgpasswd);
+        } catch (Exception e) {
+            //e.printStackTrace();
+            //System.err.println(e.getClass().getName()+": "+e.getMessage());
+            return null;
+        }
+        //System.out.println("Opened database successfully");
+        return c;
+    }
+}
\ No newline at end of file
diff --git a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DirectAccessUtil.java b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DirectAccessUtil.java
new file mode 100644
index 00000000000..699c431dcf1
--- /dev/null
+++ b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DirectAccessUtil.java
@@ -0,0 +1,111 @@
+/*
+   Copyright (C) 2005-2012, by the President and Fellows of Harvard College.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+         http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+   Dataverse Network - A web application to share, preserve and analyze research data.
+   Developed at the Institute for Quantitative Social Science, Harvard University.
+   Version 3.0.
+*/
+
+package edu.harvard.iq.dataverse.custom.service.util;
+
+import com.amazonaws.SdkClientException;
+import com.amazonaws.auth.profile.ProfileCredentialsProvider;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.s3.model.GetObjectRequest;
+import com.amazonaws.services.s3.model.ObjectMetadata;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Utility methods for directly accessing storage locations
+ * Supports file system and S3. 
+ * (S3 has only been tested with AWS; non-standard auth may not be supported yet)
+ * 
+ * @author Leonid Andreev
+ */
+public class DirectAccessUtil implements java.io.Serializable  {
+
+    private AmazonS3 s3 = null;
+    
+    public InputStream openDirectAccess(String storageLocation) {
+        InputStream inputStream = null;
+
+        if (storageLocation.startsWith("s3://")) {
+            createOrReuseAwsClient();
+            
+            if (this.s3 == null) {
+                return null; 
+            }
+            
+            storageLocation = storageLocation.substring(5);
+
+            String bucket = storageLocation.substring(0, storageLocation.indexOf('/'));
+            String key = storageLocation.substring(storageLocation.indexOf('/') + 1);
+
+            //System.out.println("bucket: "+bucket);
+            //System.out.println("key: "+key);
+            
+            /* commented-out code below is for looking up S3 metatadata
+               properties, such as size, etc. prior to making an access call:
+            ObjectMetadata objectMetadata = null;
+            long fileSize = 0L;
+            try {
+                objectMetadata = s3.getObjectMetadata(bucket, key);
+                fileSize = objectMetadata.getContentLength();
+                //System.out.println("byte size: "+objectMetadata.getContentLength());
+            } catch (SdkClientException sce) {
+                System.err.println("Cannot get S3 object metadata " + key + " from bucket " + bucket);
+            }*/
+
+            try {
+                inputStream = s3.getObject(new GetObjectRequest(bucket, key)).getObjectContent();
+            } catch (SdkClientException sce) {
+                System.err.println("Cannot get S3 object " + key + " from bucket " + bucket);
+            }
+            
+        } else if (storageLocation.startsWith("file://")) {
+            // This could be a static method; since no reusable client/maintainable
+            // state is required
+            
+            storageLocation = storageLocation.substring(7);
+            
+            try {
+                inputStream = new FileInputStream(new File(storageLocation));
+            } catch (IOException ioex) {
+                System.err.println("Cannot open file " + storageLocation);
+            }
+        }
+        
+        // Unsupported storage location - return null
+        return inputStream;
+    }
+    
+    private void createOrReuseAwsClient() {
+        if (this.s3 == null) {
+            try {
+                AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard();
+                s3CB.setCredentials(new ProfileCredentialsProvider("default"));
+                this.s3 = s3CB.build();
+
+            } catch (Exception e) {
+                System.err.println("cannot instantiate an S3 client");
+            }
+        }
+    }
+
+}
\ No newline at end of file
diff --git a/src/main/resources/db/migration/V4.20.0.4__6505-zipdownload-jobs.sql b/src/main/resources/db/migration/V4.20.0.4__6505-zipdownload-jobs.sql
new file mode 100644
index 00000000000..484d5dd0784
--- /dev/null
+++ b/src/main/resources/db/migration/V4.20.0.4__6505-zipdownload-jobs.sql
@@ -0,0 +1,2 @@
+-- maybe temporary? - work in progress
+CREATE TABLE IF NOT EXISTS CUSTOMZIPSERVICEREQUEST (KEY VARCHAR(63), STORAGELOCATION VARCHAR(255), FILENAME VARCHAR(255), ISSUETIME TIMESTAMP);

From e3973d1a90a31295a868223da52e21a5d99290e1 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 2 Jun 2020 09:42:03 -0400
Subject: [PATCH 03/57] handling of folders added to the zipper; added some
 info to the documentation explaining how the zipper does its thing. (#6505)

---
 scripts/zipdownload/README.md                 | 80 +++++++++++++++++++
 .../service/download/ZipDownloadService.java  | 48 ++++++++++-
 2 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/scripts/zipdownload/README.md b/scripts/zipdownload/README.md
index 3b576bdd58f..c1c6ec0e785 100644
--- a/scripts/zipdownload/README.md
+++ b/scripts/zipdownload/README.md
@@ -12,3 +12,83 @@ Edit the config lines in the shell script (zipdownload) as needed.
 to activate in Dataverse: 
 
 curl -X PUT -d '/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:CustomZipDownloadServiceUrl
+
+How it works:
+=============
+
+The goal: to move this potentially long-running task out of the
+Application Server. This is the sole focus of this implementation. It
+does not attempt to make it faster.
+
+The rationale here is a zipped download of a large enough number of
+large enough files will always be slow. Zipping (compressing) itself
+is a fairly CPU-intensive task. This will most frequently be the
+bottleneck of the service. Although with a slow storage location (S3
+or Swift, with a slow link to the share) it may be the speed at which
+the application accesses the raw bytes. The exact location of the
+bottleneck is in a sense irrelevant. On a very fast system, with the
+files stored on a very fast local RAID, the bottleneck for most users
+will likely shift to the speed of their internet connection to the
+Dataverse. Bottom line is, downloading this multi-file compressed
+stream will take a long time no matter how you slice it. So this hack
+addresses it by moving the task outside Payara, where it's not going
+to hog any threads. 
+
+A quick, somewhat unrelated note: attempting to download a multi-GB
+stream over http will always have its own inherent risks. If the
+download has to take hours or days to complete, it is very likely that
+it'll break down somewhere in the middle. Do note that for a zipped
+download our users will not be able to utilize `wget --continue`, or
+any similar "resume" functionality - because it's impossible to resume
+generating a zipped stream from a certain offset.
+
+The implementation is a hack. It relies on direct access to everything
+- storage locations (filesystem or S3) and the database.
+
+There are no network calls between the Application and the zipper (an
+implementation relying on such a call was discussed early
+on). Dataverse issues a "job key" and sends the user's browser to the
+zipper (to, for ex., /cgi-bin/zipdownload?<job key>) instead of
+/api/access/datafiles/<file ids>). To authorize the zipdownload for
+the "job key", and inform the zipper on which files to zip and where
+to find them, the application relies on a database table, that the
+zipper also has access too. In other words, there is a saved state
+information associated with each zipped download request. Zipper may
+be given a limited database access - for example, via a user
+authorized to access that one table only. After serving the files, the
+zipper removes the database entries. Job records in the database have
+time stamps, so on the application side, as an added level of cleanup,
+it automatically deletes any records older than 5 minutes (can be
+further reduced) every time the service adds new records; as an added
+level of cleanup for any records that got stuck in the db because the
+corresponding zipper jobs never completed. A paranoid admin may choose
+to give the zipper read-only access to the database, and rely on a
+cleanup solely on the application side.
+
+I have explored ways to avoid maintaining this state information. A
+potential implementation we discussed early on, where the application
+would make a network call to the zipper before redirecting the user
+there, would NOT solve that problem - the state would need to somehow
+be maintained on the zipper side. The only truly stateless
+implementation would rely on including all the file information WITH
+the redirect itself, with some pre-signed URL mechanism to make it
+secure. Mechanisms for pre-signing requests are readily available and
+simple to implement. We could go with something similar to how S3
+presigns their access URLs. Jim Meyers has already speced out how this
+could be done for Dataverse access urls in a design document
+(https://docs.google.com/document/d/1J8GW6zi-vSRKZdtFjLpmYJ2SUIcIkAEwHkP4q1fxL-s/edit#). (Basically,
+you hash the product of your request parameters, the issue timestamp
+AND some "secret" - like the user's API key - and send the resulting
+hash along with the request. Tempering with any of the parameters, or
+trying to extend the life span of the request, becomes impossible,
+because it would invalidate the hash). What stopped me from trying
+something like that was the sheer size of information that would need
+to be included with a request, for a potentially long list of files
+that need to be zipped. When serving a zipped download from a page
+that would be doable - we could javascript together a POST call that
+the browser could make to send all that info to the zipper. But if we
+want to implement something similar in the API, I felt like I really
+wanted to be able to simply issue a quick redirect to a manageable url
+- which with the implementation above is simply
+/cgi-bin/zipdownload?<job key>, with the <job key> being just a 16
+character hex string in the current implementation.
diff --git a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java
index 0669aa7919f..3e2f35dc75d 100644
--- a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java
+++ b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java
@@ -147,6 +147,7 @@ public void processFiles() {
             // TODO: folders
             // TODO: String zipEntryName = checkZipEntryName(fileName);
             if (inputStream != null && this.zipOutputStream != null) {
+                
                 ZipEntry entry = new ZipEntry(fileName);
 
                 byte[] bytes = new byte[2 * 8192];
@@ -154,6 +155,11 @@ public void processFiles() {
                 long readSize = 0L;
 
                 try {
+                    // Does this file have a folder name? 
+                    if (hasFolder(fileName)) {
+                        addFolderToZipStream(getFolderName(fileName), zippedFolders);
+                    }
+
                     this.zipOutputStream.putNextEntry(entry);
 
                     while ((read = inputStream.read(bytes)) != -1) {
@@ -162,7 +168,7 @@ public void processFiles() {
                     }
                     inputStream.close();
                     this.zipOutputStream.closeEntry();
-                    
+
                     /*if (fileSize == readSize) {
                         //System.out.println("Read "+readSize+" bytes;");
                     } else {
@@ -188,7 +194,45 @@ public void processFiles() {
     
     public void openZipStream() {
         if (this.zipOutputStream == null) {
-            this.zipOutputStream = new ZipOutputStream(new ChunkingOutputStream(System.out));
+            if (this.zipOnly) {
+                this.zipOutputStream = new ZipOutputStream(System.out);
+            } else {
+                this.zipOutputStream = new ZipOutputStream(new ChunkingOutputStream(System.out));
+            }
+        }
+    }
+    
+    private boolean hasFolder(String fileName) {
+        if (fileName == null) {
+            return false;
+        }
+        return fileName.indexOf('/') >= 0;
+    }
+    
+    private String getFolderName(String fileName) {
+        if (fileName == null) {
+            return "";
+        }
+        String folderName = fileName.substring(0, fileName.lastIndexOf('/'));
+        // If any of the saved folder names start with with slashes,            
+        // we want to remove them:                                              
+        // (i.e., ///foo/bar will become foo/bar)                               
+        while (folderName.startsWith("/")) {
+            folderName = folderName.substring(1);
+        }
+        return folderName;
+    }
+    
+    private void addFolderToZipStream(String folderName, Set<String> zippedFolders) throws IOException {
+        // We don't want to create folders in the output Zip file that have 
+        // already been added:
+        if (!"".equals(folderName)) {
+            if (!zippedFolders.contains(folderName)) {
+                ZipEntry d = new ZipEntry(folderName + "/");
+                zipOutputStream.putNextEntry(d);
+                zipOutputStream.closeEntry();
+                zippedFolders.add(folderName);
+            }
         }
     }
 }

From ad1787a4b22cbcf5d7ab0f25a9754a2a8fbdb753 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 2 Jun 2020 12:46:19 -0400
Subject: [PATCH 04/57] cosmetic (#6505)

---
 scripts/zipdownload/README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scripts/zipdownload/README.md b/scripts/zipdownload/README.md
index c1c6ec0e785..fea25e9792c 100644
--- a/scripts/zipdownload/README.md
+++ b/scripts/zipdownload/README.md
@@ -42,8 +42,7 @@ download our users will not be able to utilize `wget --continue`, or
 any similar "resume" functionality - because it's impossible to resume
 generating a zipped stream from a certain offset.
 
-The implementation is a hack. It relies on direct access to everything
-- storage locations (filesystem or S3) and the database.
+The implementation is a hack. It relies on direct access to everything - storage locations (filesystem or S3) and the database.
 
 There are no network calls between the Application and the zipper (an
 implementation relying on such a call was discussed early

From 1dc597b0fd0d5a83bacd7220619a97fec9bdc9cf Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 16 Jun 2020 09:05:06 -0400
Subject: [PATCH 05/57] The modifications allowing the use of the "custom
 zipper" with the API as well.(#6505)

---
 .../iq/dataverse/FileDownloadServiceBean.java |  27 +++--
 .../edu/harvard/iq/dataverse/api/Access.java  | 104 ++++++++++++++++--
 2 files changed, 116 insertions(+), 15 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
index 7a1cb04f776..4bf6704f44d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
@@ -35,6 +35,7 @@
 import javax.inject.Named;
 import javax.persistence.EntityManager;
 import javax.persistence.PersistenceContext;
+import javax.persistence.Query;
 import javax.servlet.ServletOutputStream;
 import javax.servlet.http.HttpServletResponse;
 
@@ -96,6 +97,7 @@ public void writeGuestbookAndStartBatchDownload(GuestbookResponse guestbookRespo
     }
     
     public void writeGuestbookAndStartBatchDownload(GuestbookResponse guestbookResponse, Boolean doNotSaveGuestbookRecord){
+        boolean original = true; 
         if (guestbookResponse == null || guestbookResponse.getSelectedFileIds() == null) {
             return;
         }
@@ -153,15 +155,14 @@ public void writeGuestbookAndStartBatchDownload(GuestbookResponse guestbookRespo
                             timestamp = new Timestamp(new Date().getTime());
                         }
                         
-                        //FileMetadata fm = datafileService.findFileMetadataByDatasetVersionIdAndDataFileId(guestbookResponse.getDatasetVersion().getId(), new Long(idAsString));
-                        addFileToCustomZipJob(zipServiceKey, df, timestamp);
+                        addFileToCustomZipJob(zipServiceKey, df, timestamp, original);
                     }
                 }
             }
         }
         
         if (useCustomZipService) {
-            redirectToCustomZipDownloadService(customZipDownloadUrl, zipServiceKey, "original".equals(guestbookResponse.getFileFormat()));
+            redirectToCustomZipDownloadService(customZipDownloadUrl, zipServiceKey);
         } else {
             // Use the "normal" /api/access/datafiles/ API:
             redirectToBatchDownloadAPI(guestbookResponse.getSelectedFileIds(), "original".equals(guestbookResponse.getFileFormat()));
@@ -252,7 +253,7 @@ private void redirectToBatchDownloadAPI(String multiFileString, Boolean guestboo
 
     }
     
-    private void redirectToCustomZipDownloadService(String customZipServiceUrl, String jobKey, Boolean downloadOriginal) {
+    private void redirectToCustomZipDownloadService(String customZipServiceUrl, String jobKey) {
         
         customZipServiceUrl += "?" + jobKey; 
         
@@ -541,19 +542,26 @@ public String generateServiceKey() {
         return uid.toString().substring(20);
     }
     
-    public void addFileToCustomZipJob(String key, DataFile dataFile, Timestamp timestamp) {
+    public void addFileToCustomZipJob(String key, DataFile dataFile, Timestamp timestamp, boolean orig) {
         String location = null; 
         String fileName = null; 
         
         try {
             StorageIO<DataFile> storageIO = DataAccess.getStorageIO(dataFile);
             location = storageIO.getStorageLocation();
+            if (orig && dataFile.isTabularData()) {
+                location = location.concat(".orig");
+            }
         } catch (IOException ioex) {
             logger.info("Failed to open StorageIO for datafile " + dataFile.getId());
         }
         
         if (dataFile.getFileMetadata() != null) {
-            fileName = dataFile.getFileMetadata().getLabel();
+            if (orig && dataFile.isTabularData()) {
+                fileName = dataFile.getOriginalFileName();
+            } else {
+                fileName = dataFile.getFileMetadata().getLabel();
+            }
         }
                 
         if (location != null && fileName != null) {
@@ -564,9 +572,12 @@ public void addFileToCustomZipJob(String key, DataFile dataFile, Timestamp times
                     + "'" + timestamp + "');").executeUpdate();
         }
         
-        // TODO: 
+        // TODO:
         // While we are here, issue another query, to delete all the entries that are 
-        // more than 5 minutes (or so?) old
+        // more than N seconds old?
+        Timestamp deleteTime = new Timestamp(new Date().getTime() - 300000L);
+        em.createNativeQuery("DELETE FROM CUSTOMZIPSERVICEREQUEST WHERE ISSUETIME < " 
+                + "'" + deleteTime + "';").executeUpdate();
     }
     
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java
index 42328367241..6a36fb0b75d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java
@@ -20,6 +20,7 @@
 import edu.harvard.iq.dataverse.DataverseServiceBean;
 import edu.harvard.iq.dataverse.DataverseSession;
 import edu.harvard.iq.dataverse.DataverseTheme;
+import edu.harvard.iq.dataverse.FileDownloadServiceBean;
 import edu.harvard.iq.dataverse.GuestbookResponse;
 import edu.harvard.iq.dataverse.GuestbookResponseServiceBean;
 import edu.harvard.iq.dataverse.PermissionServiceBean;
@@ -82,6 +83,7 @@
 import javax.json.Json;
 import javax.json.JsonObjectBuilder;
 import java.math.BigDecimal;
+import java.net.URI;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.function.Consumer;
@@ -116,6 +118,8 @@
 import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json;
 import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json;
 import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json;
+import java.net.URISyntaxException;
+import javax.ws.rs.RedirectionException;
 
 /*
     Custom API exceptions [NOT YET IMPLEMENTED]
@@ -169,6 +173,8 @@ public class Access extends AbstractApiBean {
     DataverseRoleServiceBean roleService;
     @EJB
     UserNotificationServiceBean userNotificationService;
+    @EJB
+    FileDownloadServiceBean fileDownloadService; 
     @Inject
     PermissionsWrapper permissionsWrapper;
     @Inject
@@ -533,6 +539,11 @@ public DownloadInstance tabularDatafileMetadataPreprocessed(@PathParam("fileId")
     @GET
     @Produces({"application/zip"})
     public Response datafiles(@PathParam("fileIds") String fileIds,  @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ {
+        String customZipServiceUrl = settingsService.getValueForKey(SettingsServiceBean.Key.CustomZipDownloadServiceUrl);
+        boolean useCustomZipService = customZipServiceUrl != null; 
+        if (fileIds == null || fileIds.equals("")) {
+            throw new BadRequestException();
+        }
 
         long setLimit = systemConfig.getZipDownloadLimit();
         if (!(setLimit > 0L)) {
@@ -542,10 +553,6 @@ public Response datafiles(@PathParam("fileIds") String fileIds,  @QueryParam("gb
         final long zipDownloadSizeLimit = setLimit; //to use via anon inner class
         
         logger.fine("setting zip download size limit to " + zipDownloadSizeLimit + " bytes.");
-        
-        if (fileIds == null || fileIds.equals("")) {
-            throw new BadRequestException();
-        }
 
         String apiToken = (apiTokenParam == null || apiTokenParam.equals("")) 
                 ? headers.getHeaderString(API_KEY_HEADER) 
@@ -560,6 +567,24 @@ public Response datafiles(@PathParam("fileIds") String fileIds,  @QueryParam("gb
                 getOrig = true;
             }
         }
+        
+        if (useCustomZipService) {
+            URI redirect_uri = null; 
+            try {
+                redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiToken, apiTokenUser, uriInfo, headers, gbrecs, true); 
+            } catch (WebApplicationException wae) {
+                throw wae;
+            }
+            
+            Response redirect = Response.seeOther(redirect_uri).build();
+            logger.fine("Issuing redirect to the file location on S3.");
+            throw new RedirectionException(redirect);
+
+        }
+        
+        // Not using the "custom service" - API will zip the file,  
+        // and stream the output, in the "normal" manner:
+        
         final boolean getOriginal = getOrig; //to use via anon inner class
         
         StreamingOutput stream = new StreamingOutput() {
@@ -1640,9 +1665,74 @@ private User findAPITokenUser(String apiToken) {
         return apiTokenUser;
     }
 
+    private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, String apiToken, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean gbrecs, boolean orig) throws WebApplicationException {
+        String zipServiceKey = null; 
+        Timestamp timestamp = null; 
+        
+        String fileIdParams[] = fileIds.split(",");
+        int validIdCount = 0; 
+        int validFileCount = 0;
+        int downloadAuthCount = 0; 
 
+        if (fileIdParams == null || fileIdParams.length == 0) {
+            throw new BadRequestException();
+        }
+        
+        for (int i = 0; i < fileIdParams.length; i++) {
+            Long fileId = null;
+            try {
+                fileId = new Long(fileIdParams[i]);
+                validIdCount++;
+            } catch (NumberFormatException nfe) {
+                fileId = null;
+            }
+            if (fileId != null) {
+                DataFile file = dataFileService.find(fileId);
+                if (file != null) {
+                    validFileCount++;
+                    if (isAccessAuthorized(file, apiToken)) {
+                        logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance.");
+                        if (gbrecs != true && file.isReleased()) {
+                            GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(file.getOwner(), file, session, apiTokenUser);
+                            guestbookResponseService.save(gbr);
+                            MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, file);
+                            mdcLogService.logEntry(entry);
+                        }
 
-            
-            
-            
+                        if (zipServiceKey == null) {
+                            zipServiceKey = fileDownloadService.generateServiceKey();
+                        }
+                        if (timestamp == null) {
+                            timestamp = new Timestamp(new Date().getTime());
+                        }
+
+                        fileDownloadService.addFileToCustomZipJob(zipServiceKey, file, timestamp, true);
+                        downloadAuthCount++;
+                    }
+                }
+            }
+        }
+
+        if (validIdCount == 0) {
+            throw new BadRequestException();
+        }
+        
+        if (validFileCount == 0) {
+            // no supplied id translated into an existing DataFile
+            throw new NotFoundException();
+        }
+        
+        if (downloadAuthCount == 0) {
+            // none of the DataFiles were authorized for download
+            throw new ForbiddenException();
+        }
+        
+        URI redirectUri = null;
+        try {
+            redirectUri = new URI(customZipServiceUrl + "?" + zipServiceKey);
+        } catch (URISyntaxException use) {
+            throw new BadRequestException(); 
+        }
+        return redirectUri;
+    }           
 }

From ddfc88c3ca221bccb7df2a0bdd454fc8d56040ea Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 22 Jun 2020 21:07:38 -0400
Subject: [PATCH 06/57] uncommented the line that cleans the request table, on
 the service executable side. (#6505)

---
 .../iq/dataverse/custom/service/util/DatabaseAccessUtil.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DatabaseAccessUtil.java b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DatabaseAccessUtil.java
index 5ae01d985e4..423942877d7 100644
--- a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DatabaseAccessUtil.java
+++ b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DatabaseAccessUtil.java
@@ -88,14 +88,14 @@ public class DatabaseAccessUtil implements java.io.Serializable  {
         // job of the Dataverse application to, say, automatically delete all the 
         // entries older than 5 min. every time it accesses the table on its side.
         
-        /*try {
+        try {
             stmt = c.createStatement();
             stmt.executeUpdate("DELETE FROM CustomZipServiceRequest WHERE key='" + jobKey +"';");
             c.commit();
         } catch (Exception e) {
             // Not much we can or want to do, but complain in the Apache logs:
             System.err.println("Failed to delete the job from the db");
-        }*/
+        }
         
         try {
             c.close();

From aa923ba34ff812091ace1fd56f3dc65822646838 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 22 Jun 2020 22:11:54 -0400
Subject: [PATCH 07/57] a release note for the "zipper tool". (#6505)

---
 doc/release-notes/6505-zipdownload-service.md | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 doc/release-notes/6505-zipdownload-service.md

diff --git a/doc/release-notes/6505-zipdownload-service.md b/doc/release-notes/6505-zipdownload-service.md
new file mode 100644
index 00000000000..c5084267c64
--- /dev/null
+++ b/doc/release-notes/6505-zipdownload-service.md
@@ -0,0 +1,24 @@
+### A multi-file, zipped download optimization
+
+In this release we are offering an experimental optimization for the
+multi-file, download-as-zip functionality. If this option is enabled,
+instead of enforcing size limits, we attempt to serve all the files
+that the user requested (that they are authorized to download), but
+the request is redirected to a standalone zipper service running as a
+cgi executable. Thus moving these potentially long-running jobs
+completely outside the Application Server (Payara); and preventing
+service threads from becoming locked serving them. Since zipping is
+also a CPU-intensive task, it is possible to have this service running
+on a different host system, thus freeing the cycles on the main
+Application Server. (The system running the service needs to have
+access to the database as well as to the storage filesystem, and/or S3
+bucket).
+
+Please consult the scripts/zipdownload/README.md in the Dataverse 5
+source tree.
+
+The components of the standalone "zipper tool" can also be downloaded
+here:
+(my plan is to build the executable and to add it to the v5
+release files on github: - L.A.)
+https://github.com/IQSS/dataverse/releases/download/v5.0/zipper.zip.
\ No newline at end of file

From 5d27982b1b4ebd7f56adc349138bd9de3fc22670 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 22 Jun 2020 22:34:24 -0400
Subject: [PATCH 08/57] added a section on the zipper service to the
 "installation/advanced" section (#6505)

---
 .../source/installation/advanced.rst          | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/doc/sphinx-guides/source/installation/advanced.rst b/doc/sphinx-guides/source/installation/advanced.rst
index 9bcba1a9fac..8d61678a715 100644
--- a/doc/sphinx-guides/source/installation/advanced.rst
+++ b/doc/sphinx-guides/source/installation/advanced.rst
@@ -35,3 +35,42 @@ If you have successfully installed multiple app servers behind a load balancer y
 You would repeat the steps above for all of your app servers. If users seem to be having a problem with a particular server, you can ask them to visit https://dataverse.example.edu/host.txt and let you know what they see there (e.g. "server1.example.edu") to help you know which server to troubleshoot.
 
 Please note that :ref:`network-ports` under the Configuration section has more information on fronting your app server with Apache. The :doc:`shibboleth` section talks about the use of ``ProxyPassMatch``.
+
+Optional Components
+-------------------
+
+Standalone "Zipper" Service Tool
+++++++++++++++++++++++++++++++++
+
+As of Dataverse v5.0 we offer an experimental optimization for the
+multi-file, download-as-zip functionality. If this option is enabled,
+instead of enforcing size limits, we attempt to serve all the files
+that the user requested (that they are authorized to download), but
+the request is redirected to a standalone zipper service running as a
+cgi-bin executable under Apache. Thus moving these potentially
+long-running jobs completely outside the Application Server (Payara);
+and preventing worker threads from becoming locked serving them. Since
+zipping is also a CPU-intensive task, it is possible to have this
+service running on a different host system, thus freeing the cycles on
+the main Application Server. (The system running the service needs to
+have access to the database as well as to the storage filesystem,
+and/or S3 bucket).
+
+Please consult the scripts/zipdownload/README.md in the Dataverse 5
+source tree for more information. 
+
+To install: follow the instructions in the file above to build
+``ZipDownloadService-v1.0.0.jar``. Copy it, together with the shell
+script scripts/zipdownload/cgi-bin/zipdownload to the cgi-bin
+directory of the chosen Apache server (/var/www/cgi-bin standard).
+Edit the config lines in the shell script (zipdownload) to configure
+database access credentials. Do note that the executable does not need
+access to the entire Dataverse database. A secuirity-conscious admin
+can create a dedicated database user with access to just one table:
+``CUSTOMZIPSERVICEREQUEST``.
+
+to activate in Dataverse::
+
+   curl -X PUT -d '/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:CustomZipDownloadServiceUrl
+
+

From c99fa60c60993a5921ce261fd3a13babec56a4eb Mon Sep 17 00:00:00 2001
From: Danny Brooke <danny_brooke@harvard.edu>
Date: Wed, 24 Jun 2020 17:09:59 -0400
Subject: [PATCH 09/57] adding new setting to release notes

---
 doc/release-notes/6505-zipdownload-service.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/doc/release-notes/6505-zipdownload-service.md b/doc/release-notes/6505-zipdownload-service.md
index c5084267c64..a3a83b9e31a 100644
--- a/doc/release-notes/6505-zipdownload-service.md
+++ b/doc/release-notes/6505-zipdownload-service.md
@@ -21,4 +21,10 @@ The components of the standalone "zipper tool" can also be downloaded
 here:
 (my plan is to build the executable and to add it to the v5
 release files on github: - L.A.)
-https://github.com/IQSS/dataverse/releases/download/v5.0/zipper.zip.
\ No newline at end of file
+https://github.com/IQSS/dataverse/releases/download/v5.0/zipper.zip.
+
+## New JVM Options and DB Options
+
+### New DB Option CustomZipDownloadServiceUrl
+
+If defined, this is the URL of the zipping service outside the main Application Service where zip downloads should be directed (instead of /api/access/datafiles/)

From c10b516d4b77f8169e35f1d27d8f95b7e8935b35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felker=20Tam=C3=A1s?= <felker.tomi@gmail.com>
Date: Tue, 23 Jun 2020 13:34:14 +0200
Subject: [PATCH 10/57] Public ORCID login is available.

---
 .../authorization/providers/oauth2/impl/OrcidOAuth2AP.java     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java
index be22e9bc332..24d13eedc29 100644
--- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java
+++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java
@@ -55,7 +55,8 @@ public class OrcidOAuth2AP extends AbstractOAuth2AuthenticationProvider {
     public static final String PROVIDER_ID_SANDBOX = "orcid-sandbox";
     
     public OrcidOAuth2AP(String clientId, String clientSecret, String userEndpoint) {
-        scope = Arrays.asList("/read-limited");
+        String s = userEndpoint.startsWith("https://pub") ? "/authenticate" : "/read-limited";
+        scope = Arrays.asList(s);
         this.clientId = clientId;
         this.clientSecret = clientSecret;
         this.baseUserEndpoint = userEndpoint;

From 8dfe4c448c87c63e4fa9449eea8de49ab6382441 Mon Sep 17 00:00:00 2001
From: landreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 12:07:51 -0400
Subject: [PATCH 11/57] Update scripts/zipdownload/README.md

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 scripts/zipdownload/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/zipdownload/README.md b/scripts/zipdownload/README.md
index fea25e9792c..7ebfb05cf90 100644
--- a/scripts/zipdownload/README.md
+++ b/scripts/zipdownload/README.md
@@ -2,7 +2,7 @@ Work in progress!
 
 to build: 
 
-clean compile assembly:single
+mvn clean compile assembly:single
 
 to install: 
 

From 46584da7cffb4cd017dae812aa085f2062caedce Mon Sep 17 00:00:00 2001
From: landreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 12:09:50 -0400
Subject: [PATCH 12/57] Update
 scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 .../dataverse/custom/service/download/ZipDownloadService.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java
index 3e2f35dc75d..7277c542cf1 100644
--- a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java
+++ b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java
@@ -175,7 +175,7 @@ public void processFiles() {
                         throw new IOException("Byte size mismatch: expected " + fileSize + ", read: " + readSize);
                     }*/
                 } catch (IOException ioex) {
-                    System.err.println("Faile to compress "+storageLocation);
+                    System.err.println("Failed to compress "+storageLocation);
                 }
             } else {
                 System.err.println("Failed to access "+storageLocation);

From 5aaaff5664c2f610f654e941d0f5318dd1d385f1 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 12:56:59 -0400
Subject: [PATCH 13/57] Better/safer handling of database queries (#6505)

---
 .../service/download/ZipDownloadService.java  |  6 +-
 .../service/util/DatabaseAccessUtil.java      | 58 ++++++++++++++-----
 2 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java
index 3e2f35dc75d..8dc62cdb36f 100644
--- a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java
+++ b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ZipDownloadService.java
@@ -144,8 +144,10 @@ public void processFiles() {
             
             InputStream inputStream = this.directAccessUtil.openDirectAccess(storageLocation);
                 
-            // TODO: folders
-            // TODO: String zipEntryName = checkZipEntryName(fileName);
+            // (potential?) TODO: String zipEntryName = checkZipEntryName(fileName);
+            // this may not be needed anymore - some extra sanitizing of the file 
+            // name we used to have to do - since all the values in a current Dataverse 
+            // database may already be santized enough.
             if (inputStream != null && this.zipOutputStream != null) {
                 
                 ZipEntry entry = new ZipEntry(fileName);
diff --git a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DatabaseAccessUtil.java b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DatabaseAccessUtil.java
index 423942877d7..8f9c34fe0a1 100644
--- a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DatabaseAccessUtil.java
+++ b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/util/DatabaseAccessUtil.java
@@ -22,6 +22,7 @@
 
 import java.sql.Connection;
 import java.sql.DriverManager;
+import java.sql.PreparedStatement;
 import java.sql.ResultSet;
 import java.sql.Statement;
 import java.util.ArrayList;
@@ -37,8 +38,26 @@ public class DatabaseAccessUtil implements java.io.Serializable  {
 
     // The zipper needs to make one database call to initiate each job.
     // So the database connection can be closed immediately.
+    
+    private static final int JOB_TOKEN_LENGTH = 16;
+    // A legitimate token is 16 characters long, and is made up of 
+    // hex digits and one dash. THERE ARE prettier ways to spell out 
+    // this regular expression - I just wanted it to be clear what it does:
+    private static final String JOB_TOKEN_REGEX = "^[0-9a-f][0-9a-f]*\\-[0-9a-f][0-9a-f]*$";
+    private static final String JOB_LOOKUP_QUERY = "SELECT * FROM CustomZipServiceRequest WHERE key=?";
+    private static final String JOB_DELETE_QUERY = "DELETE FROM CustomZipServiceRequest WHERE key=?";
 
     public static List<String []> lookupZipJob(String jobKey) {
+        // Before we do anything, it is super important to sanitize the 
+        // supplied token - we don't want to insert anything sketchy into 
+        // the db query below (an "injection attack"). 
+        // java.sql PreparedStatement.setString() that we are using below 
+        // should also be checking against an attemp to insert a sub-query. 
+        // But better safe than sorry. 
+        if (!validateTokenFormat(jobKey)) {
+            return null; // This will result in a "no such job" response.
+        }
+        
         Connection c = connectToDatabase();
         
         if (c == null) {
@@ -46,7 +65,7 @@ public class DatabaseAccessUtil implements java.io.Serializable  {
             return null; 
         }
         
-        Statement stmt; 
+        PreparedStatement stmt; 
         ResultSet rs; 
         
         List<String[]> ret = new ArrayList<>();
@@ -54,8 +73,9 @@ public class DatabaseAccessUtil implements java.io.Serializable  {
         try {
             c.setAutoCommit(false);
 
-            stmt = c.createStatement();
-            rs = stmt.executeQuery( "SELECT * FROM CustomZipServiceRequest WHERE key='" + jobKey +"';" );
+            stmt = c.prepareStatement(JOB_LOOKUP_QUERY);
+            stmt.setString(1, jobKey);
+            rs = stmt.executeQuery();
             
             while ( rs.next() ) {
                 String storageLocation = rs.getString("storageLocation");
@@ -83,18 +103,21 @@ public class DatabaseAccessUtil implements java.io.Serializable  {
         
         // Delete all the entries associated with the job, now that we are done
         // with it. 
-        // Alternatively, the db user whose credentials the zipper is using
-        // may be given only read-only access to the table; and it could be the 
-        // job of the Dataverse application to, say, automatically delete all the 
-        // entries older than 5 min. every time it accesses the table on its side.
         
         try {
-            stmt = c.createStatement();
-            stmt.executeUpdate("DELETE FROM CustomZipServiceRequest WHERE key='" + jobKey +"';");
+            stmt = c.prepareStatement(JOB_DELETE_QUERY);
+            stmt.setString(1, jobKey);
+            stmt.executeUpdate();
             c.commit();
         } catch (Exception e) {
             // Not much we can or want to do, but complain in the Apache logs:
-            System.err.println("Failed to delete the job from the db");
+            // (not even sure about printing any log messages either; the reason
+            // this delete failed may be because the admin chose to only give 
+            // the zipper read-only access to the db - in which case this will 
+            // be happening every time a job is processed. which in turn is 
+            // ok - there is a backup cleanup mechanism for deleting older jobs
+            // on the application side as well).
+            //System.err.println("Failed to delete the job from the db");
         }
         
         try {
@@ -122,11 +145,20 @@ private static Connection connectToDatabase() {
                             pguser,
                             pgpasswd);
         } catch (Exception e) {
-            //e.printStackTrace();
-            //System.err.println(e.getClass().getName()+": "+e.getMessage());
             return null;
         }
-        //System.out.println("Opened database successfully");
         return c;
     }
+    
+    private static boolean validateTokenFormat(String jobKey) {
+        // A legitimate token is 16 characters long, and is made up of 
+        // hex digits and one dash. 
+        if (jobKey == null 
+                || jobKey.length() != JOB_TOKEN_LENGTH 
+                || !jobKey.matches(JOB_TOKEN_REGEX)) {
+            return false;
+        }
+        
+        return true;
+    }
 }
\ No newline at end of file

From 3eb3976192e7bcb5acd32f4d4786a72a04f09b8c Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 13:04:17 -0400
Subject: [PATCH 14/57] added a line about the Apache configuration to the
 installation instruction

---
 scripts/zipdownload/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/zipdownload/README.md b/scripts/zipdownload/README.md
index 7ebfb05cf90..66e486be725 100644
--- a/scripts/zipdownload/README.md
+++ b/scripts/zipdownload/README.md
@@ -2,12 +2,16 @@ Work in progress!
 
 to build: 
 
+cd scripts/zipdownload
 mvn clean compile assembly:single
 
 to install: 
 
 install cgi-bin/zipdownload and ZipDownloadService-v1.0.0.jar in your cgi-bin directory (/var/www/cgi-bin standard). 
 Edit the config lines in the shell script (zipdownload) as needed. 
+You may need to make extra Apache configuration changes to make sure /cgi-bin/zipdownload is accessible from the outside. 
+For example, if this is the same Apache that's in front of your Dataverse Payara instance, you'll need to add another pass through statement to your configuration: 
+``ProxyPassMatch ^/cgi-bin/zipdownload !``
 
 to activate in Dataverse: 
 

From 1cd8629a26b1acef0941bc7025fd76ebf020c7d2 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 13:06:47 -0400
Subject: [PATCH 15/57] line breaks in the readme (#6505)

---
 scripts/zipdownload/README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/zipdownload/README.md b/scripts/zipdownload/README.md
index 66e486be725..2a54ec15245 100644
--- a/scripts/zipdownload/README.md
+++ b/scripts/zipdownload/README.md
@@ -8,9 +8,12 @@ mvn clean compile assembly:single
 to install: 
 
 install cgi-bin/zipdownload and ZipDownloadService-v1.0.0.jar in your cgi-bin directory (/var/www/cgi-bin standard). 
+
 Edit the config lines in the shell script (zipdownload) as needed. 
+
 You may need to make extra Apache configuration changes to make sure /cgi-bin/zipdownload is accessible from the outside. 
 For example, if this is the same Apache that's in front of your Dataverse Payara instance, you'll need to add another pass through statement to your configuration: 
+
 ``ProxyPassMatch ^/cgi-bin/zipdownload !``
 
 to activate in Dataverse: 

From 69297fb256fe4e7db1ece6e30a7f2eb5b6a0a8a5 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 13:19:08 -0400
Subject: [PATCH 16/57] small addition to the guide on installation (#6505)

---
 doc/sphinx-guides/source/installation/advanced.rst | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/advanced.rst b/doc/sphinx-guides/source/installation/advanced.rst
index 8d61678a715..2f15666933d 100644
--- a/doc/sphinx-guides/source/installation/advanced.rst
+++ b/doc/sphinx-guides/source/installation/advanced.rst
@@ -59,10 +59,15 @@ and/or S3 bucket).
 Please consult the scripts/zipdownload/README.md in the Dataverse 5
 source tree for more information. 
 
-To install: follow the instructions in the file above to build
-``ZipDownloadService-v1.0.0.jar``. Copy it, together with the shell
+To install: You can follow the instructions in the file above to build
+``ZipDownloadService-v1.0.0.jar``. It will also be available, pre-built as part of the Dataverse release on GitHub. Copy it, together with the shell
 script scripts/zipdownload/cgi-bin/zipdownload to the cgi-bin
 directory of the chosen Apache server (/var/www/cgi-bin standard).
+You may need to make extra Apache configuration changes to make sure /cgi-bin/zipdownload is accessible from the outside.
+For example, if this is the same Apache that's in front of your Dataverse Payara instance, you will need to add another pass through statement to your configuration:
+
+``ProxyPassMatch ^/cgi-bin/zipdownload !``
+
 Edit the config lines in the shell script (zipdownload) to configure
 database access credentials. Do note that the executable does not need
 access to the entire Dataverse database. A secuirity-conscious admin

From 6e2e39650f8dfb0774a87cfe2d2a07cc7b80e07b Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 13:33:31 -0400
Subject: [PATCH 17/57] documents the zipper setting. (#6505)

---
 doc/sphinx-guides/source/installation/config.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index a026c4e19ff..0bc4e040738 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -2117,3 +2117,16 @@ Unlike other facets, those indexed by Date/Year are sorted chronologically by de
 If you don’t want date facets to be sorted chronologically, set:
 
 ``curl -X PUT -d 'false' http://localhost:8080/api/admin/settings/:ChronologicalDateFacets``
+
+:CustomZipDownloadServiceUrl
+++++++++++++++++++++++++++++
+
+The location of the "Standalone Zipper" service. If this option is specified, Dataverse will be redirecing bulk/mutli-file zip download requests to that location, instead of serving them internally. See the "Advanced" section of the Installation guide for information on how to installe the external zipper. (This is still an experimental feature, as of v5.0). 
+
+To enable redirects to the zipper installed on the same server as the main Dataverse application: 
+
+``curl -X PUT -d '/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:CustomZipDownloadServiceUrl``
+
+To enable redirects to the zipper on a different server: 
+
+``curl -X PUT -d 'https://zipper.example.edu/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:CustomZipDownloadServiceUrl`` 
\ No newline at end of file

From 72394a46edd4c4ef37922fd5f0409ac242a585c3 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 13:39:04 -0400
Subject: [PATCH 18/57] fixes "original" always being true (#6505)

---
 .../edu/harvard/iq/dataverse/FileDownloadServiceBean.java   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
index 2174eb3094f..e8be6ac087f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
@@ -97,10 +97,12 @@ public void writeGuestbookAndStartBatchDownload(GuestbookResponse guestbookRespo
     }
     
     public void writeGuestbookAndStartBatchDownload(GuestbookResponse guestbookResponse, Boolean doNotSaveGuestbookRecord){
-        boolean original = true; 
+         
         if (guestbookResponse == null || guestbookResponse.getSelectedFileIds() == null) {
             return;
         }
+        
+        boolean original = "original".equals(guestbookResponse.getFileFormat());
 
         // Let's intercept the case where a multiple download method was called, 
         // with only 1 file on the list. We'll treat it like a single file download 
@@ -165,7 +167,7 @@ public void writeGuestbookAndStartBatchDownload(GuestbookResponse guestbookRespo
             redirectToCustomZipDownloadService(customZipDownloadUrl, zipServiceKey);
         } else {
             // Use the "normal" /api/access/datafiles/ API:
-            redirectToBatchDownloadAPI(guestbookResponse.getSelectedFileIds(), "original".equals(guestbookResponse.getFileFormat()));
+            redirectToBatchDownloadAPI(guestbookResponse.getSelectedFileIds(),original);
         }
     }
     

From 96c37086ce3f8f66b510cdf95f72bbe994eec127 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 14:18:26 -0400
Subject: [PATCH 19/57] removed unnecessary repos from pom.xml; a few more
 words in the advanced guide; #6505

---
 .../source/installation/advanced.rst          | 20 ++++++++++--------
 scripts/zipdownload/pom.xml                   | 21 -------------------
 .../engine/command/DataverseRequest.java      |  1 +
 3 files changed, 12 insertions(+), 30 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/advanced.rst b/doc/sphinx-guides/source/installation/advanced.rst
index 2f15666933d..6471eb8d1ab 100644
--- a/doc/sphinx-guides/source/installation/advanced.rst
+++ b/doc/sphinx-guides/source/installation/advanced.rst
@@ -43,18 +43,20 @@ Standalone "Zipper" Service Tool
 ++++++++++++++++++++++++++++++++
 
 As of Dataverse v5.0 we offer an experimental optimization for the
-multi-file, download-as-zip functionality. If this option is enabled,
-instead of enforcing size limits, we attempt to serve all the files
-that the user requested (that they are authorized to download), but
-the request is redirected to a standalone zipper service running as a
-cgi-bin executable under Apache. Thus moving these potentially
+multi-file, download-as-zip functionality. If this option
+(``:CustomZipDownloadServiceUrl``) is enabled, instead of enforcing
+the size limit on multi-file zipped downloads (as normally specified
+by the option ``:ZipDownloadLimit``), we attempt to serve all the
+files that the user requested (that they are authorized to download),
+but the request is redirected to a standalone zipper service running
+as a cgi-bin executable under Apache. Thus moving these potentially
 long-running jobs completely outside the Application Server (Payara);
 and preventing worker threads from becoming locked serving them. Since
 zipping is also a CPU-intensive task, it is possible to have this
-service running on a different host system, thus freeing the cycles on
-the main Application Server. (The system running the service needs to
-have access to the database as well as to the storage filesystem,
-and/or S3 bucket).
+service running on a different host system, freeing the cycles on the
+main Application Server. (The system running the service needs to have
+access to the database as well as to the storage filesystem, and/or S3
+bucket).
 
 Please consult the scripts/zipdownload/README.md in the Dataverse 5
 source tree for more information. 
diff --git a/scripts/zipdownload/pom.xml b/scripts/zipdownload/pom.xml
index 60a5a5814ab..256062a46e2 100644
--- a/scripts/zipdownload/pom.xml
+++ b/scripts/zipdownload/pom.xml
@@ -29,27 +29,6 @@
             <url>https://repo1.maven.org/maven2</url>
             <layout>default</layout>
         </repository>
-        <repository>
-            <id>prime-repo</id>
-            <name>PrimeFaces Maven Repository</name>
-            <url>https://repository.primefaces.org</url>
-            <layout>default</layout>
-        </repository>
-        <repository>
-            <id>dataone.org</id>
-            <url>https://maven.dataone.org</url>
-            <releases>
-                <enabled>true</enabled>
-            </releases>
-            <snapshots>
-                <enabled>true</enabled>
-            </snapshots>
-        </repository>
-        <repository>
-            <id>dvn.private</id>
-            <name>Local repository for hosting jars not available from network repositories.</name>
-            <url>file://${project.basedir}/local_lib</url>
-        </repository>
     </repositories>
     <dependencyManagement>
         <dependencies>
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/DataverseRequest.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/DataverseRequest.java
index 1384b7aef2e..1b75b040d48 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/DataverseRequest.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/DataverseRequest.java
@@ -91,6 +91,7 @@ public DataverseRequest(User aUser, HttpServletRequest aHttpServletRequest) {
                     if (index >= 0) {
                         ip = ip.substring(index + 1);
                     }
+                    ip=ip.trim();
                     /*
                      * We should have a valid, single IP address string here. The IpAddress.valueOf
                      * call will throw an exception if it can't be parsed into a valid address (e.g.

From e01c213d98c802dcb12ebe6ebe6a34abe5f74369 Mon Sep 17 00:00:00 2001
From: landreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 14:22:33 -0400
Subject: [PATCH 20/57] Update
 doc/sphinx-guides/source/installation/advanced.rst

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 doc/sphinx-guides/source/installation/advanced.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/advanced.rst b/doc/sphinx-guides/source/installation/advanced.rst
index 6471eb8d1ab..7da559a619c 100644
--- a/doc/sphinx-guides/source/installation/advanced.rst
+++ b/doc/sphinx-guides/source/installation/advanced.rst
@@ -72,7 +72,7 @@ For example, if this is the same Apache that's in front of your Dataverse Payara
 
 Edit the config lines in the shell script (zipdownload) to configure
 database access credentials. Do note that the executable does not need
-access to the entire Dataverse database. A secuirity-conscious admin
+access to the entire Dataverse database. A security-conscious admin
 can create a dedicated database user with access to just one table:
 ``CUSTOMZIPSERVICEREQUEST``.
 
@@ -80,4 +80,3 @@ to activate in Dataverse::
 
    curl -X PUT -d '/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:CustomZipDownloadServiceUrl
 
-

From 9e42aec35af9795ec43aa758a8b988612f3148d1 Mon Sep 17 00:00:00 2001
From: landreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 14:23:52 -0400
Subject: [PATCH 21/57] Update scripts/zipdownload/README.md

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 scripts/zipdownload/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/zipdownload/README.md b/scripts/zipdownload/README.md
index 2a54ec15245..e6297f35623 100644
--- a/scripts/zipdownload/README.md
+++ b/scripts/zipdownload/README.md
@@ -80,7 +80,7 @@ implementation would rely on including all the file information WITH
 the redirect itself, with some pre-signed URL mechanism to make it
 secure. Mechanisms for pre-signing requests are readily available and
 simple to implement. We could go with something similar to how S3
-presigns their access URLs. Jim Meyers has already speced out how this
+presigns their access URLs. Jim Myers has already speced out how this
 could be done for Dataverse access urls in a design document
 (https://docs.google.com/document/d/1J8GW6zi-vSRKZdtFjLpmYJ2SUIcIkAEwHkP4q1fxL-s/edit#). (Basically,
 you hash the product of your request parameters, the issue timestamp

From 6100ed62d70621b4e453161e125fe15d9b72a106 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 14:31:25 -0400
Subject: [PATCH 22/57] style/grammar #6505

---
 scripts/zipdownload/README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/zipdownload/README.md b/scripts/zipdownload/README.md
index e6297f35623..8c5c9b3c3be 100644
--- a/scripts/zipdownload/README.md
+++ b/scripts/zipdownload/README.md
@@ -23,6 +23,8 @@ curl -X PUT -d '/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:
 How it works:
 =============
 
+(This is an ongoing design discussion - other developers are welcome to contribute)
+
 The goal: to move this potentially long-running task out of the
 Application Server. This is the sole focus of this implementation. It
 does not attempt to make it faster.
@@ -36,7 +38,7 @@ the application accesses the raw bytes. The exact location of the
 bottleneck is in a sense irrelevant. On a very fast system, with the
 files stored on a very fast local RAID, the bottleneck for most users
 will likely shift to the speed of their internet connection to the
-Dataverse. Bottom line is, downloading this multi-file compressed
+server. The bottom line is, downloading this multi-file compressed
 stream will take a long time no matter how you slice it. So this hack
 addresses it by moving the task outside Payara, where it's not going
 to hog any threads. 

From aaaa035bf9411758a32cb2bcafb7fb7038295f0f Mon Sep 17 00:00:00 2001
From: landreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 14:37:35 -0400
Subject: [PATCH 23/57] Update scripts/zipdownload/README.md

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 scripts/zipdownload/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/zipdownload/README.md b/scripts/zipdownload/README.md
index 8c5c9b3c3be..0747ad9f42e 100644
--- a/scripts/zipdownload/README.md
+++ b/scripts/zipdownload/README.md
@@ -53,7 +53,7 @@ generating a zipped stream from a certain offset.
 
 The implementation is a hack. It relies on direct access to everything - storage locations (filesystem or S3) and the database.
 
-There are no network calls between the Application and the zipper (an
+There are no network calls between the application (Dataverse) and the zipper (an
 implementation relying on such a call was discussed early
 on). Dataverse issues a "job key" and sends the user's browser to the
 zipper (to, for ex., /cgi-bin/zipdownload?<job key>) instead of

From c5cca50151d3c0a4c93679858e4c8a611cc2fee3 Mon Sep 17 00:00:00 2001
From: landreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 14:39:04 -0400
Subject: [PATCH 24/57] Update scripts/zipdownload/README.md

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 scripts/zipdownload/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/zipdownload/README.md b/scripts/zipdownload/README.md
index 0747ad9f42e..2a509da29f3 100644
--- a/scripts/zipdownload/README.md
+++ b/scripts/zipdownload/README.md
@@ -60,7 +60,7 @@ zipper (to, for ex., /cgi-bin/zipdownload?<job key>) instead of
 /api/access/datafiles/<file ids>). To authorize the zipdownload for
 the "job key", and inform the zipper on which files to zip and where
 to find them, the application relies on a database table, that the
-zipper also has access too. In other words, there is a saved state
+zipper also has access to. In other words, there is a saved state
 information associated with each zipped download request. Zipper may
 be given a limited database access - for example, via a user
 authorized to access that one table only. After serving the files, the

From 1d4b83fb1755b1d0c839298e878c6bddc77cda0f Mon Sep 17 00:00:00 2001
From: landreev <leonid@hmdc.harvard.edu>
Date: Fri, 26 Jun 2020 14:39:38 -0400
Subject: [PATCH 25/57] Update scripts/zipdownload/README.md

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 scripts/zipdownload/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/zipdownload/README.md b/scripts/zipdownload/README.md
index 2a509da29f3..2e18405fb92 100644
--- a/scripts/zipdownload/README.md
+++ b/scripts/zipdownload/README.md
@@ -87,7 +87,7 @@ could be done for Dataverse access urls in a design document
 (https://docs.google.com/document/d/1J8GW6zi-vSRKZdtFjLpmYJ2SUIcIkAEwHkP4q1fxL-s/edit#). (Basically,
 you hash the product of your request parameters, the issue timestamp
 AND some "secret" - like the user's API key - and send the resulting
-hash along with the request. Tempering with any of the parameters, or
+hash along with the request. Tampering with any of the parameters, or
 trying to extend the life span of the request, becomes impossible,
 because it would invalidate the hash). What stopped me from trying
 something like that was the sheer size of information that would need

From d34eccaccc5411d115fb23ec0605b537d8821205 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 26 Jun 2020 15:03:00 -0400
Subject: [PATCH 26/57] typo

---
 doc/sphinx-guides/source/installation/config.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index 0bc4e040738..3de1596cca9 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -2121,7 +2121,7 @@ If you don’t want date facets to be sorted chronologically, set:
 :CustomZipDownloadServiceUrl
 ++++++++++++++++++++++++++++
 
-The location of the "Standalone Zipper" service. If this option is specified, Dataverse will be redirecing bulk/mutli-file zip download requests to that location, instead of serving them internally. See the "Advanced" section of the Installation guide for information on how to installe the external zipper. (This is still an experimental feature, as of v5.0). 
+The location of the "Standalone Zipper" service. If this option is specified, Dataverse will be redirecing bulk/mutli-file zip download requests to that location, instead of serving them internally. See the "Advanced" section of the Installation guide for information on how to install the external zipper. (This is still an experimental feature, as of v5.0).
 
 To enable redirects to the zipper installed on the same server as the main Dataverse application: 
 
@@ -2129,4 +2129,4 @@ To enable redirects to the zipper installed on the same server as the main Datav
 
 To enable redirects to the zipper on a different server: 
 
-``curl -X PUT -d 'https://zipper.example.edu/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:CustomZipDownloadServiceUrl`` 
\ No newline at end of file
+``curl -X PUT -d 'https://zipper.example.edu/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:CustomZipDownloadServiceUrl`` 

From 55c2c8918c384404c1dec008d6c60d03e71897de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felker=20Tam=C3=A1s?= <felker.tomi@gmail.com>
Date: Mon, 29 Jun 2020 10:33:08 +0200
Subject: [PATCH 27/57] Fix for NPE in test.

---
 .../authorization/providers/oauth2/impl/OrcidOAuth2AP.java  | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java
index 24d13eedc29..99534a84bd2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java
+++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java
@@ -55,7 +55,11 @@ public class OrcidOAuth2AP extends AbstractOAuth2AuthenticationProvider {
     public static final String PROVIDER_ID_SANDBOX = "orcid-sandbox";
     
     public OrcidOAuth2AP(String clientId, String clientSecret, String userEndpoint) {
-        String s = userEndpoint.startsWith("https://pub") ? "/authenticate" : "/read-limited";
+
+        String s = null;
+        if(userEndpoint != null){
+            s = userEndpoint.startsWith("https://pub") ? "/authenticate" : "/read-limited";
+        }
         scope = Arrays.asList(s);
         this.clientId = clientId;
         this.clientSecret = clientSecret;

From 8da201b42a5e61e15a8835b2b5d82c6f06c27fbc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felker=20Tam=C3=A1s?= <felker.tomi@gmail.com>
Date: Mon, 29 Jun 2020 11:10:21 +0200
Subject: [PATCH 28/57] Oauth documentation is extended with ORCID public API
 option.

---
 .../root/auth-providers/{orcid.json => orcid-member.json} | 0
 .../files/root/auth-providers/orcid-public.json           | 8 ++++++++
 doc/sphinx-guides/source/installation/oauth2.rst          | 5 +++--
 3 files changed, 11 insertions(+), 2 deletions(-)
 rename doc/sphinx-guides/source/_static/installation/files/root/auth-providers/{orcid.json => orcid-member.json} (100%)
 create mode 100644 doc/sphinx-guides/source/_static/installation/files/root/auth-providers/orcid-public.json

diff --git a/doc/sphinx-guides/source/_static/installation/files/root/auth-providers/orcid.json b/doc/sphinx-guides/source/_static/installation/files/root/auth-providers/orcid-member.json
similarity index 100%
rename from doc/sphinx-guides/source/_static/installation/files/root/auth-providers/orcid.json
rename to doc/sphinx-guides/source/_static/installation/files/root/auth-providers/orcid-member.json
diff --git a/doc/sphinx-guides/source/_static/installation/files/root/auth-providers/orcid-public.json b/doc/sphinx-guides/source/_static/installation/files/root/auth-providers/orcid-public.json
new file mode 100644
index 00000000000..8a0c7bbe6c5
--- /dev/null
+++ b/doc/sphinx-guides/source/_static/installation/files/root/auth-providers/orcid-public.json
@@ -0,0 +1,8 @@
+{
+    "id":"orcid-public",
+    "factoryAlias":"oauth2",
+    "title":"ORCID",
+    "subtitle":"",
+    "factoryData":"type: orcid | userEndpoint: https://pub.orcid.org/v2.1/{ORCID}/person | clientId: FIXME | clientSecret: FIXME",
+    "enabled":true
+}
diff --git a/doc/sphinx-guides/source/installation/oauth2.rst b/doc/sphinx-guides/source/installation/oauth2.rst
index 4484ca72168..4c8e7041c75 100644
--- a/doc/sphinx-guides/source/installation/oauth2.rst
+++ b/doc/sphinx-guides/source/installation/oauth2.rst
@@ -26,7 +26,7 @@ Identity Provider Side
 Obtain Client ID and Client Secret 
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Before OAuth providers will release information about their users (first name, last name, etc.) to your Dataverse installation, you must request a "Client ID" and "Client Secret" from them. In the case of GitHub and Google, this is as simple as clicking a few buttons and there is no cost associated with using their authentication service. ORCID and Microsoft, on the other hand, do not have an automated system for requesting these credentials, and it is not free to use these authentication services.
+Before OAuth providers will release information about their users (first name, last name, etc.) to your Dataverse installation, you must request a "Client ID" and "Client Secret" from them. In the case of GitHub and Google, this is as simple as clicking a few buttons and there is no cost associated with using their authentication service. ORCID has a free public API that can also be used for authentication and accessing public data. ORCID member API and Microsoft, on the other hand, do not have an automated system for requesting these credentials, and it is not free to use them.
 
 URLs to help you request a Client ID and Client Secret from the providers supported by Dataverse are provided below.  For all of these providers, it's a good idea to request the Client ID and Client secret using a generic account, perhaps the one that's associated with the ``:SystemEmail`` you've configured for Dataverse, rather than your own personal Microsoft Azure AD, ORCID, GitHub, or Google account:
 
@@ -51,7 +51,8 @@ As explained under "Auth Modes" in the :doc:`config` section, available authenti
 
 We will ``POST`` a JSON file containing the Client ID and Client Secret to this ``authenticationProviders`` API endpoint to add another authentication provider. As a starting point, you'll want to download the JSON template file matching the provider you're setting up:
 
-- :download:`orcid.json <../_static/installation/files/root/auth-providers/orcid.json>`
+- :download:`orcid-public.json <../_static/installation/files/root/auth-providers/orcid-public.json>`
+- :download:`orcid-member.json <../_static/installation/files/root/auth-providers/orcid-member.json>`
 - :download:`github.json <../_static/installation/files/root/auth-providers/github.json>`
 - :download:`google.json <../_static/installation/files/root/auth-providers/google.json>`
 - :download:`microsoft.json <../_static/installation/files/root/auth-providers/microsoft.json>`

From 9d7c843e4c9b667d0c51a19796af319b9e37d4ac Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 29 Jun 2020 11:10:43 -0400
Subject: [PATCH 29/57] remove cost, link to ORCID APIs public, member #7025

---
 doc/sphinx-guides/source/installation/oauth2.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/oauth2.rst b/doc/sphinx-guides/source/installation/oauth2.rst
index 4c8e7041c75..e185623d3c6 100644
--- a/doc/sphinx-guides/source/installation/oauth2.rst
+++ b/doc/sphinx-guides/source/installation/oauth2.rst
@@ -26,11 +26,11 @@ Identity Provider Side
 Obtain Client ID and Client Secret 
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Before OAuth providers will release information about their users (first name, last name, etc.) to your Dataverse installation, you must request a "Client ID" and "Client Secret" from them. In the case of GitHub and Google, this is as simple as clicking a few buttons and there is no cost associated with using their authentication service. ORCID has a free public API that can also be used for authentication and accessing public data. ORCID member API and Microsoft, on the other hand, do not have an automated system for requesting these credentials, and it is not free to use them.
+Before OAuth providers will release information about their users (first name, last name, etc.) to your Dataverse installation, you must request a "Client ID" and "Client Secret" from them. In many cases you can use providers' automated system to request these credentials, but if not, contact the provider for assistance.
 
 URLs to help you request a Client ID and Client Secret from the providers supported by Dataverse are provided below.  For all of these providers, it's a good idea to request the Client ID and Client secret using a generic account, perhaps the one that's associated with the ``:SystemEmail`` you've configured for Dataverse, rather than your own personal Microsoft Azure AD, ORCID, GitHub, or Google account:
 
-- ORCID: https://orcid.org/content/register-client-application-production-trusted-party
+- ORCID: https://orcid.org/content/register-client-application-0
 - Microsoft: https://docs.microsoft.com/en-us/azure/active-directory/develop/v1-protocols-oauth-code
 - GitHub: https://github.com/settings/applications/new via https://developer.github.com/v3/oauth/
 - Google: https://console.developers.google.com/projectselector/apis/credentials via https://developers.google.com/identity/protocols/OAuth2WebServer (pick "OAuth client ID")

From c1a612628e331cdaa29753a8c6369fc7c7bcd9ae Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 30 Jun 2020 13:12:39 -0400
Subject: [PATCH 30/57] add download all buttons under access button #6118

---
 .../edu/harvard/iq/dataverse/DatasetPage.java | 21 ++++++++++++--
 src/main/webapp/dataset.xhtml                 | 28 ++++++++++++-------
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index cba68fb1ff7..46dea68c7b3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -2886,8 +2886,25 @@ public List<FileMetadata> getSelectedNonDownloadableFiles() {
     public void setSelectedNonDownloadableFiles(List<FileMetadata> selectedNonDownloadableFiles) {
         this.selectedNonDownloadableFiles = selectedNonDownloadableFiles;
     }
-    
-            
+
+    public void validateAllFilesForDownloadArchival() {
+        selectAllFiles();
+        boolean guestbookRequired = isDownloadPopupRequired();
+        boolean downloadOriginal = false;
+        validateFilesForDownload(guestbookRequired, downloadOriginal);
+    }
+
+    /**
+     * Can result in "requested optional service" error. For non-tabular files
+     * it's safer to use validateAllFilesForDownloadArchival.
+     */
+    public void validateAllFilesForDownloadOriginal() {
+        selectAllFiles();
+        boolean guestbookRequired = isDownloadPopupRequired();
+        boolean downloadOriginal = true;
+        validateFilesForDownload(guestbookRequired, downloadOriginal);
+    }
+
     public void validateFilesForDownload(boolean guestbookRequired, boolean downloadOriginal){
         setSelectedDownloadableFiles(new ArrayList<>());
         setSelectedNonDownloadableFiles(new ArrayList<>());
diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml
index 658b429768e..b4ff7347946 100644
--- a/src/main/webapp/dataset.xhtml
+++ b/src/main/webapp/dataset.xhtml
@@ -31,11 +31,7 @@
                                                                             and !permissionsWrapper.canIssuePublishDatasetCommand(DatasetPage.dataset)}"/>
             <ui:param name="showReturnToAuthorLink" value="#{DatasetPage.dataset.latestVersion.versionState=='DRAFT' and DatasetPage.dataset.latestVersion.inReview
                                                                             and permissionsWrapper.canIssuePublishDatasetCommand(DatasetPage.dataset)}"/>
-        <!-- TO-DO #3513 DOWNLOAD ALL LINK -->
-            <!-- ADDED TOOLS SPECIFIC LOGIC TO DatasetPage.canDownloadFiles() RENDER LOGIC, CAN BE REMOVED WHEN DOWNLOAD ALL FILES IS WIRED UP -->
-            <ui:param name="showAccessDatasetButtonGroup" value="#{(DatasetPage.canDownloadFiles() 
-                                                                        and (DatasetPage.datasetExploreTools.size() >= 1 
-                                                                        or (DatasetPage.sessionUserAuthenticated and DatasetPage.showComputeButton())))
+            <ui:param name="showAccessDatasetButtonGroup" value="#{DatasetPage.canDownloadFiles()
                                                                    and (!DatasetPage.workingVersion.deaccessioned
                                                                     or (DatasetPage.workingVersion.deaccessioned and DatasetPage.canUpdateDataset()))}"/>
             <ui:param name="publishDataset" value="#{DatasetPage.publishDatasetPopup()}"/>
@@ -143,17 +139,29 @@
                                                 </button>
                                                 <ul class="dropdown-menu pull-right text-left">
                                                     
-                                                    <ui:remove>
                                                     <!-- DOWNLOAD -->
-                                                    <!-- TO-DO #3513 DOWNLOAD ALL LINK -->
                                                     <ui:fragment rendered="#{DatasetPage.canDownloadFiles()}">
                                                         <li class="dropdown-header">#{bundle['dataset.accessBtn.header.download']} <span class="glyphicon glyphicon-download-alt"/></li>
-                                                        <li class="disabled">
-                                                            <span class="ui-commandlink ui-widget ui-state-disabled btn-download">Download (Placeholder)</span>
+                                                        <!-- NORMAL DOWNLOAD BUTTON (NO TABULAR FILES) -->
+                                                        <li jsf:rendered="#{!DatasetPage.isHasTabular()}">
+                                                            <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadArchival()}">
+                                                                #{bundle.download}
+                                                            </p:commandLink>
+                                                        </li>
+                                                        <!-- DOWNLOAD ORIGINAL BUTTON (TABULAR FILES PRESENT) -->
+                                                        <li jsf:rendered="#{DatasetPage.isHasTabular()}">
+                                                            <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadOriginal()}">
+                                                                #{bundle.downloadOriginal}
+                                                            </p:commandLink>
+                                                        </li>
+                                                        <!-- DOWNLOAD ARCHIVAL FILES (TABULAR FILES PRESENT) -->
+                                                        <li jsf:rendered="#{DatasetPage.isHasTabular()}">
+                                                            <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadArchival()}">
+                                                                #{bundle.downloadArchival}
+                                                            </p:commandLink>
                                                         </li>
                                                     </ui:fragment>
                                                     <!-- END: DOWNLOAD -->
-                                                    </ui:remove>
 
                                                     <!-- EXPLORE -->
                                                     <ui:fragment rendered="#{DatasetPage.datasetExploreTools.size() >= 1}">

From 3f4ac7f34c2be2948a4a7a0507d31e700adb064a Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 1 Jul 2020 13:08:06 -0400
Subject: [PATCH 31/57] add size to "download all" link #6118

---
 .../java/edu/harvard/iq/dataverse/DatasetPage.java  | 13 +++++++++++++
 src/main/webapp/dataset.xhtml                       |  6 +++---
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 46dea68c7b3..f9f0eccb97e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -17,6 +17,7 @@
 import edu.harvard.iq.dataverse.datacapturemodule.ScriptRequestResponse;
 import edu.harvard.iq.dataverse.dataset.DatasetThumbnail;
 import edu.harvard.iq.dataverse.dataset.DatasetUtil;
+import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker;
 import edu.harvard.iq.dataverse.datavariable.VariableServiceBean;
 import edu.harvard.iq.dataverse.engine.command.Command;
 import edu.harvard.iq.dataverse.engine.command.CommandContext;
@@ -91,9 +92,11 @@
 import javax.faces.model.SelectItem;
 import java.util.logging.Level;
 import edu.harvard.iq.dataverse.datasetutility.WorldMapPermissionHelper;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
 import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
 import edu.harvard.iq.dataverse.engine.command.impl.AbstractSubmitToArchiveCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.RequestRsyncScriptCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetResult;
@@ -2887,6 +2890,16 @@ public void setSelectedNonDownloadableFiles(List<FileMetadata> selectedNonDownlo
         this.selectedNonDownloadableFiles = selectedNonDownloadableFiles;
     }
 
+    public String getSizeOfDataset() {
+        GetDatasetStorageSizeCommand cmd = new GetDatasetStorageSizeCommand(dvRequestService.getDataverseRequest(), dataset, false, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, workingVersion);
+        try {
+            long bytes = commandEngine.submit(cmd);
+            return FileSizeChecker.bytesToHumanReadable(bytes);
+        } catch (CommandException ex) {
+            return "";
+        }
+    }
+
     public void validateAllFilesForDownloadArchival() {
         selectAllFiles();
         boolean guestbookRequired = isDownloadPopupRequired();
diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml
index b4ff7347946..9e6682ad599 100644
--- a/src/main/webapp/dataset.xhtml
+++ b/src/main/webapp/dataset.xhtml
@@ -145,19 +145,19 @@
                                                         <!-- NORMAL DOWNLOAD BUTTON (NO TABULAR FILES) -->
                                                         <li jsf:rendered="#{!DatasetPage.isHasTabular()}">
                                                             <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadArchival()}">
-                                                                #{bundle.download}
+                                                                #{bundle.download} (ZIP, #{DatasetPage.sizeOfDataset})
                                                             </p:commandLink>
                                                         </li>
                                                         <!-- DOWNLOAD ORIGINAL BUTTON (TABULAR FILES PRESENT) -->
                                                         <li jsf:rendered="#{DatasetPage.isHasTabular()}">
                                                             <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadOriginal()}">
-                                                                #{bundle.downloadOriginal}
+                                                                #{bundle.downloadOriginal} (ZIP, #{DatasetPage.sizeOfDataset})
                                                             </p:commandLink>
                                                         </li>
                                                         <!-- DOWNLOAD ARCHIVAL FILES (TABULAR FILES PRESENT) -->
                                                         <li jsf:rendered="#{DatasetPage.isHasTabular()}">
                                                             <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadArchival()}">
-                                                                #{bundle.downloadArchival}
+                                                                #{bundle.downloadArchival} (ZIP, #{DatasetPage.sizeOfDataset})
                                                             </p:commandLink>
                                                         </li>
                                                     </ui:fragment>

From 054f249a4181672e2ca49245a4c9d138f4c9d37a Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 2 Jul 2020 11:27:32 -0400
Subject: [PATCH 32/57] add btn-download style class for analytics #6118

---
 src/main/webapp/dataset.xhtml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml
index 9e6682ad599..c15f1813368 100644
--- a/src/main/webapp/dataset.xhtml
+++ b/src/main/webapp/dataset.xhtml
@@ -144,19 +144,19 @@
                                                         <li class="dropdown-header">#{bundle['dataset.accessBtn.header.download']} <span class="glyphicon glyphicon-download-alt"/></li>
                                                         <!-- NORMAL DOWNLOAD BUTTON (NO TABULAR FILES) -->
                                                         <li jsf:rendered="#{!DatasetPage.isHasTabular()}">
-                                                            <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadArchival()}">
+                                                            <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadArchival()}" styleClass="btn-download">
                                                                 #{bundle.download} (ZIP, #{DatasetPage.sizeOfDataset})
                                                             </p:commandLink>
                                                         </li>
                                                         <!-- DOWNLOAD ORIGINAL BUTTON (TABULAR FILES PRESENT) -->
                                                         <li jsf:rendered="#{DatasetPage.isHasTabular()}">
-                                                            <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadOriginal()}">
+                                                            <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadOriginal()}" styleClass="btn-download">
                                                                 #{bundle.downloadOriginal} (ZIP, #{DatasetPage.sizeOfDataset})
                                                             </p:commandLink>
                                                         </li>
                                                         <!-- DOWNLOAD ARCHIVAL FILES (TABULAR FILES PRESENT) -->
                                                         <li jsf:rendered="#{DatasetPage.isHasTabular()}">
-                                                            <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadArchival()}">
+                                                            <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadArchival()}" styleClass="btn-download">
                                                                 #{bundle.downloadArchival} (ZIP, #{DatasetPage.sizeOfDataset})
                                                             </p:commandLink>
                                                         </li>

From 140ba41e4501324bb513bc6fe760e3aa5e8ed55d Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 2 Jul 2020 12:16:49 -0400
Subject: [PATCH 33/57] move ZIP to bundle #6118

---
 src/main/java/propertyFiles/Bundle.properties |  1 +
 src/main/webapp/dataset.xhtml                 | 15 ++++++++++++---
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 2a10076c90d..23c0a689f2c 100755
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -1219,6 +1219,7 @@ dataset.accessBtn=Access Dataset
 dataset.accessBtn.header.download=Download Options
 dataset.accessBtn.header.explore=Explore Options
 dataset.accessBtn.header.compute=Compute Options
+dataset.accessBtn.download.size=ZIP ({0})
 dataset.linkBtn=Link Dataset
 dataset.contactBtn=Contact Owner
 dataset.shareBtn=Share
diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml
index c15f1813368..9fc0f7433ca 100644
--- a/src/main/webapp/dataset.xhtml
+++ b/src/main/webapp/dataset.xhtml
@@ -145,19 +145,28 @@
                                                         <!-- NORMAL DOWNLOAD BUTTON (NO TABULAR FILES) -->
                                                         <li jsf:rendered="#{!DatasetPage.isHasTabular()}">
                                                             <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadArchival()}" styleClass="btn-download">
-                                                                #{bundle.download} (ZIP, #{DatasetPage.sizeOfDataset})
+                                                                #{bundle.download}
+                                                                <h:outputFormat value="#{bundle['dataset.accessBtn.download.size']}">
+                                                                    <f:param value="#{DatasetPage.sizeOfDataset}" />
+                                                                </h:outputFormat>
                                                             </p:commandLink>
                                                         </li>
                                                         <!-- DOWNLOAD ORIGINAL BUTTON (TABULAR FILES PRESENT) -->
                                                         <li jsf:rendered="#{DatasetPage.isHasTabular()}">
                                                             <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadOriginal()}" styleClass="btn-download">
-                                                                #{bundle.downloadOriginal} (ZIP, #{DatasetPage.sizeOfDataset})
+                                                                #{bundle.downloadOriginal}
+                                                                <h:outputFormat value="#{bundle['dataset.accessBtn.download.size']}">
+                                                                    <f:param value="#{DatasetPage.sizeOfDataset}" />
+                                                                </h:outputFormat>
                                                             </p:commandLink>
                                                         </li>
                                                         <!-- DOWNLOAD ARCHIVAL FILES (TABULAR FILES PRESENT) -->
                                                         <li jsf:rendered="#{DatasetPage.isHasTabular()}">
                                                             <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadArchival()}" styleClass="btn-download">
-                                                                #{bundle.downloadArchival} (ZIP, #{DatasetPage.sizeOfDataset})
+                                                                #{bundle.downloadArchival}
+                                                                <h:outputFormat value="#{bundle['dataset.accessBtn.download.size']}">
+                                                                    <f:param value="#{DatasetPage.sizeOfDataset}" />
+                                                                </h:outputFormat>
                                                             </p:commandLink>
                                                         </li>
                                                     </ui:fragment>

From 427c88356ee9aa684360457b14419d3f8474423a Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 7 Jul 2020 14:35:23 -0400
Subject: [PATCH 34/57] stop using selectedFiles field #6118

---
 .../edu/harvard/iq/dataverse/DatasetPage.java | 110 +++++++++++++++++-
 src/main/webapp/dataset.xhtml                 |  22 ++++
 2 files changed, 127 insertions(+), 5 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index f9f0eccb97e..5fed0b625c9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -2890,6 +2890,16 @@ public void setSelectedNonDownloadableFiles(List<FileMetadata> selectedNonDownlo
         this.selectedNonDownloadableFiles = selectedNonDownloadableFiles;
     }
 
+    private List<FileMetadata> selectedNonDownloadallableFiles;
+
+    public List<FileMetadata> getSelectedNonDownloadallableFiles() {
+        return selectedNonDownloadallableFiles;
+    }
+
+    public void setSelectedNonDownloadallableFiles(List<FileMetadata> selectedNonDownloadallableFiles) {
+        this.selectedNonDownloadallableFiles = selectedNonDownloadallableFiles;
+    }
+
     public String getSizeOfDataset() {
         GetDatasetStorageSizeCommand cmd = new GetDatasetStorageSizeCommand(dvRequestService.getDataverseRequest(), dataset, false, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, workingVersion);
         try {
@@ -2901,10 +2911,9 @@ public String getSizeOfDataset() {
     }
 
     public void validateAllFilesForDownloadArchival() {
-        selectAllFiles();
         boolean guestbookRequired = isDownloadPopupRequired();
         boolean downloadOriginal = false;
-        validateFilesForDownload(guestbookRequired, downloadOriginal);
+        validateFilesForDownloadAll(guestbookRequired, downloadOriginal);
     }
 
     /**
@@ -2912,10 +2921,9 @@ public void validateAllFilesForDownloadArchival() {
      * it's safer to use validateAllFilesForDownloadArchival.
      */
     public void validateAllFilesForDownloadOriginal() {
-        selectAllFiles();
         boolean guestbookRequired = isDownloadPopupRequired();
         boolean downloadOriginal = true;
-        validateFilesForDownload(guestbookRequired, downloadOriginal);
+        validateFilesForDownloadAll(guestbookRequired, downloadOriginal);
     }
 
     public void validateFilesForDownload(boolean guestbookRequired, boolean downloadOriginal){
@@ -2981,7 +2989,77 @@ public void validateFilesForDownload(boolean guestbookRequired, boolean download
         }       
 
     }
-    
+
+    /**
+     * This method borrows heavily from validateFilesForDownload but does not
+     * use the selectedFiles field.
+     */
+    public void validateFilesForDownloadAll(boolean guestbookRequired, boolean downloadOriginal) {
+        setSelectedNonDownloadallableFiles(new ArrayList<>());
+        List<FileMetadata> downloadableFiles = new ArrayList<>();
+        for (FileMetadata fmd : workingVersion.getFileMetadatas()) {
+            if (this.fileDownloadHelper.canDownloadFile(fmd)) {
+                downloadableFiles.add(fmd);
+            } else {
+                getSelectedNonDownloadallableFiles().add(fmd);
+            }
+        }
+
+        // If some of the files were restricted and we had to drop them off the
+        // list, and NONE of the files are left on the downloadable list
+        // - we show them a "you're out of luck" popup:
+        if (downloadableFiles.isEmpty() && !getSelectedNonDownloadallableFiles().isEmpty()) {
+            //RequestContext requestContext = RequestContext.getCurrentInstance();
+            PrimeFaces.current().executeScript("PF('downloadInvalid').show()");
+            return;
+        }
+
+        // Note that the GuestbookResponse object may still have information from
+        // the last download action performed by the user. For example, it may
+        // still have the non-null Datafile in it, if the user has just downloaded
+        // a single file; or it may still have the format set to "original" -
+        // even if that's not what they are trying to do now.
+        // So make sure to reset these values:
+        guestbookResponse.setDataFile(null);
+        // Inline getSelectedDownloadableFilesIdsString() that doesn't use selectedDownloadableFiles
+        String downloadIdString = "";
+        for (FileMetadata fmd : downloadableFiles) {
+            if (!StringUtil.isEmpty(downloadIdString)) {
+                downloadIdString += ",";
+            }
+            downloadIdString += fmd.getDataFile().getId();
+        }
+        guestbookResponse.setSelectedFileIds(downloadIdString);
+        if (downloadOriginal) {
+            guestbookResponse.setFileFormat("original");
+        } else {
+            guestbookResponse.setFileFormat("");
+        }
+        guestbookResponse.setDownloadtype("Download");
+
+        // If we have a bunch of files that we can download, AND there were no files
+        // that we had to take off the list, because of permissions - we can
+        // either send the user directly to the download API (if no guestbook/terms
+        // popup is required), or send them to the download popup:
+        if (!downloadableFiles.isEmpty() && getSelectedNonDownloadallableFiles().isEmpty()) {
+            if (guestbookRequired) {
+                openDownloadPopupForDownloadAll();
+            } else {
+                startMultipleFileDownload();
+            }
+            return;
+        }
+
+        // ... and if some files were restricted, but some are downloadable,
+        // we are showing them this "you are somewhat in luck" popup; that will
+        // then direct them to the download, or popup, as needed:
+        if (!downloadableFiles.isEmpty() && !getSelectedNonDownloadallableFiles().isEmpty()) {
+            //RequestContext requestContext = RequestContext.getCurrentInstance();
+            PrimeFaces.current().executeScript("PF('downloadAllMixed').show()");
+        }
+
+    }
+
     private boolean selectAllFiles;
 
     public boolean isSelectAllFiles() {
@@ -4103,6 +4181,28 @@ public void openDownloadPopupForMultipleFileDownload() {
         //RequestContext requestContext = RequestContext.getCurrentInstance();
         PrimeFaces.current().executeScript("PF('downloadPopup').show();handleResizeDialog('downloadPopup');");
     }
+
+    /**
+     * This method borrows heavily from
+     * openDownloadPopupForMultipleFileDownload. It does not use the
+     * selectedFiles field.
+     */
+    public void openDownloadPopupForDownloadAll() {
+        // This is commented out because "download all" doesn't use selectedFiles.
+//        if (this.selectedFiles.isEmpty()) {
+//            //RequestContext requestContext = RequestContext.getCurrentInstance();
+//            PrimeFaces.current().executeScript("PF('selectFilesForDownload').show()");
+//            return;
+//        }
+
+        // There's a chance that this is not really a batch download - i.e.,
+        // there may only be one file on the downloadable list. But the fileDownloadService
+        // method below will check for that, and will redirect to the single download, if
+        // that's the case. -- L.A.
+        this.guestbookResponse.setDownloadtype("Download");
+        //RequestContext requestContext = RequestContext.getCurrentInstance();
+        PrimeFaces.current().executeScript("PF('downloadPopup').show();handleResizeDialog('downloadPopup');");
+    }
     
     public void initGuestbookMultipleResponse(String selectedFileIds){
          initGuestbookResponse(null, "download", selectedFileIds);
diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml
index 9fc0f7433ca..0fac2d74724 100644
--- a/src/main/webapp/dataset.xhtml
+++ b/src/main/webapp/dataset.xhtml
@@ -889,6 +889,28 @@
                             </button>
                         </div>
                     </p:dialog>
+                    <p:dialog id="downloadAllMixed" styleClass="smallPopUp" header="#{bundle['dataset.inValidSelectedFilesForDownload']}" widgetVar="downloadAllMixed" modal="true">
+                        <p class="text-danger"><span class="glyphicon glyphicon-exclamation-sign"/> #{bundle['dataset.mixedSelectedFilesForDownload']}</p>
+                        <table>
+                            <ui:repeat var="resFile" value="#{DatasetPage.selectedNonDownloadallableFiles}" >
+                                <tr>
+                                    <td>#{resFile.label}</td>
+                                </tr>
+                            </ui:repeat>
+                        </table>
+                        <div class="button-block">
+                            <p class="help-block">#{bundle['dataset.downloadUnrestricted']}</p>
+                            <p:commandButton styleClass="btn btn-default" value="#{bundle.continue}" onclick="PF('downloadAllMixed').hide()"
+                                             rendered="#{!DatasetPage.downloadPopupRequired}"
+                                             action="#{DatasetPage.startMultipleFileDownload()}"/>
+                            <p:commandButton styleClass="btn btn-default" value="#{bundle.continue}" onclick="PF('downloadAllMixed').hide()"
+                                             rendered="#{DatasetPage.downloadPopupRequired and !settingsWrapper.rsyncDownload}"
+                                             action="#{DatasetPage.openDownloadPopupForDownloadAll()}" update="@form"/>
+                            <button class="btn btn-link" onclick="PF('downloadAllMixed').hide();" type="button">
+                                #{bundle.cancel}
+                            </button>
+                        </div>
+                    </p:dialog>
                     <p:dialog id="deleteConfirmation" styleClass="smallPopUp" header="#{bundle['file.deleteDialog.header']}" widgetVar="deleteConfirmation" modal="true">
                         <p class="text-warning"><span class="glyphicon glyphicon-warning-sign"/> #{bundle['file.deleteDialog.tip']}</p>
                         <div class="button-block">

From 19577769b5aabe4ab1b9391b0fc0de01857a547b Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 7 Jul 2020 17:13:47 -0400
Subject: [PATCH 35/57] don't double count tabular files in size #6118

---
 .../edu/harvard/iq/dataverse/DatasetPage.java | 16 +++++-
 .../iq/dataverse/DatasetServiceBean.java      | 49 +++++++++++++------
 .../impl/GetDatasetStorageSizeCommand.java    | 14 +++++-
 src/main/webapp/dataset.xhtml                 |  2 +-
 4 files changed, 62 insertions(+), 19 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 5fed0b625c9..1c92d8ce02c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -2901,7 +2901,21 @@ public void setSelectedNonDownloadallableFiles(List<FileMetadata> selectedNonDow
     }
 
     public String getSizeOfDataset() {
-        GetDatasetStorageSizeCommand cmd = new GetDatasetStorageSizeCommand(dvRequestService.getDataverseRequest(), dataset, false, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, workingVersion);
+        boolean countCachedFiles = false;
+        boolean useOrigFileSize = false;
+        GetDatasetStorageSizeCommand cmd = new GetDatasetStorageSizeCommand(dvRequestService.getDataverseRequest(), dataset, countCachedFiles, useOrigFileSize, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, workingVersion);
+        try {
+            long bytes = commandEngine.submit(cmd);
+            return FileSizeChecker.bytesToHumanReadable(bytes);
+        } catch (CommandException ex) {
+            return "";
+        }
+    }
+
+    public String getSizeOfDatasetOrig() {
+        boolean countCachedFiles = false;
+        boolean useOrigFileSize = true;
+        GetDatasetStorageSizeCommand cmd = new GetDatasetStorageSizeCommand(dvRequestService.getDataverseRequest(), dataset, countCachedFiles, useOrigFileSize, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, workingVersion);
         try {
             long bytes = commandEngine.submit(cmd);
             return FileSizeChecker.bytesToHumanReadable(bytes);
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
index 0853ee785bb..8ca820988f3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
@@ -880,11 +880,16 @@ public long findStorageSize(Dataset dataset, boolean countCachedExtras) throws I
         return findStorageSize(dataset, countCachedExtras, GetDatasetStorageSizeCommand.Mode.STORAGE, null);
     }
   
+    public long findStorageSize(Dataset dataset, boolean countCachedExtras, GetDatasetStorageSizeCommand.Mode mode, DatasetVersion version) throws IOException {
+        boolean useOrigFileSize = false;
+        return findStorageSize(dataset, countCachedExtras, useOrigFileSize, mode, version);
+    }
     /**
      * Returns the total byte size of the files in this dataset 
      * 
      * @param dataset
      * @param countCachedExtras boolean indicating if the cached disposable extras should also be counted
+     * @param useOrigFileSize allows original tabular file size to be used instead of derived archival file
      * @param mode String indicating whether we are getting the result for storage (entire dataset) or download version based
      * @param version optional param for dataset version
      * @return total size 
@@ -893,7 +898,7 @@ public long findStorageSize(Dataset dataset, boolean countCachedExtras) throws I
      * default mode, the method doesn't need to access the storage system, as the 
      * sizes of the main files are recorded in the database)
      */
-    public long findStorageSize(Dataset dataset, boolean countCachedExtras, GetDatasetStorageSizeCommand.Mode mode, DatasetVersion version) throws IOException {
+    public long findStorageSize(Dataset dataset, boolean countCachedExtras, boolean useOrigFileSize, GetDatasetStorageSizeCommand.Mode mode, DatasetVersion version) throws IOException {
         long total = 0L; 
         
         if (dataset.isHarvested()) {
@@ -913,27 +918,39 @@ public long findStorageSize(Dataset dataset, boolean countCachedExtras, GetDatas
     
         
         //CACHED EXTRAS FOR DOWNLOAD?
-        
-        
-        for (DataFile datafile : filesToTally) {
-                total += datafile.getFilesize();
 
-                if (!countCachedExtras) {
-                    if (datafile.isTabularData()) {
-                        // count the size of the stored original, in addition to the main tab-delimited file:
-                        Long originalFileSize = datafile.getDataTable().getOriginalFileSize();
-                        if (originalFileSize != null) {
-                            total += originalFileSize;
-                        }
+
+        for (DataFile datafile : filesToTally) {
+            if (datafile.isTabularData()) {
+                if (useOrigFileSize) {
+                    // count the size of the stored original, rather than the main tab-delimited file
+                    Long originalFileSize = datafile.getDataTable().getOriginalFileSize();
+                    if (originalFileSize != null) {
+                        total += originalFileSize;
                     }
                 } else {
-                    StorageIO<DataFile> storageIO = datafile.getStorageIO();
-                    for (String cachedFileTag : storageIO.listAuxObjects()) {
-                        total += storageIO.getAuxObjectSize(cachedFileTag);
+                    total += datafile.getFilesize();
+                }
+            } else {
+                total += datafile.getFilesize();
+            }
+
+            if (!countCachedExtras) {
+                if (!useOrigFileSize && datafile.isTabularData()) {
+                    // count the size of the stored original, in addition to the main tab-delimited file:
+                    Long originalFileSize = datafile.getDataTable().getOriginalFileSize();
+                    if (originalFileSize != null) {
+                        total += originalFileSize;
                     }
                 }
+            } else {
+                StorageIO<DataFile> storageIO = datafile.getStorageIO();
+                for (String cachedFileTag : storageIO.listAuxObjects()) {
+                    total += storageIO.getAuxObjectSize(cachedFileTag);
+                }
             }
-        
+        }
+
         // and finally,
         if (countCachedExtras) {
             // count the sizes of the files cached for the dataset itself
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java
index f1f27fdcee2..4b55f90bf98 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java
@@ -32,6 +32,7 @@ public class GetDatasetStorageSizeCommand extends AbstractCommand<Long> {
 
     private final Dataset dataset;
     private final Boolean countCachedFiles;
+    private final boolean useOrigFileSize;
     private final Mode mode;
     private final DatasetVersion version;
     
@@ -44,6 +45,7 @@ public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target) {
         super(aRequest, target);
         dataset = target;
         countCachedFiles = false;
+        this.useOrigFileSize = false;
         mode = Mode.DOWNLOAD;
         version = null;
     }
@@ -52,6 +54,16 @@ public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target, b
         super(aRequest, target);
         dataset = target;
         this.countCachedFiles = countCachedFiles;
+        this.useOrigFileSize = false;
+        this.mode = mode;
+        this.version = version;
+    }
+
+    public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target, boolean countCachedFiles, boolean useOrigFileSize, Mode mode, DatasetVersion version) {
+        super(aRequest, target);
+        dataset = target;
+        this.countCachedFiles = countCachedFiles;
+        this.useOrigFileSize = useOrigFileSize;
         this.mode = mode;
         this.version = version;
     }
@@ -66,7 +78,7 @@ public Long execute(CommandContext ctxt) throws CommandException {
         }
 
         try {
-            return ctxt.datasets().findStorageSize(dataset, countCachedFiles, mode, version);
+            return ctxt.datasets().findStorageSize(dataset, countCachedFiles, useOrigFileSize, mode, version);
         } catch (IOException ex) {
             throw new CommandException(BundleUtil.getStringFromBundle("datasets.api.datasize.ioerror"), this);
         }
diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml
index 0fac2d74724..579666705f9 100644
--- a/src/main/webapp/dataset.xhtml
+++ b/src/main/webapp/dataset.xhtml
@@ -156,7 +156,7 @@
                                                             <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadOriginal()}" styleClass="btn-download">
                                                                 #{bundle.downloadOriginal}
                                                                 <h:outputFormat value="#{bundle['dataset.accessBtn.download.size']}">
-                                                                    <f:param value="#{DatasetPage.sizeOfDataset}" />
+                                                                    <f:param value="#{DatasetPage.sizeOfDatasetOrig}" />
                                                                 </h:outputFormat>
                                                             </p:commandLink>
                                                         </li>

From 5a98e64d54b6afce65085ee27fe3de8e4446246f Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 8 Jul 2020 11:10:28 +0200
Subject: [PATCH 36/57] Refactor userEndpoint checks for Public ORCID scope.
 #7025

---
 .../providers/oauth2/impl/OrcidOAuth2AP.java          | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java
index 99534a84bd2..02177ee0032 100644
--- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java
+++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java
@@ -55,12 +55,13 @@ public class OrcidOAuth2AP extends AbstractOAuth2AuthenticationProvider {
     public static final String PROVIDER_ID_SANDBOX = "orcid-sandbox";
     
     public OrcidOAuth2AP(String clientId, String clientSecret, String userEndpoint) {
-
-        String s = null;
-        if(userEndpoint != null){
-            s = userEndpoint.startsWith("https://pub") ? "/authenticate" : "/read-limited";
+    
+        if(userEndpoint != null && userEndpoint.startsWith("https://pub")) {
+            this.scope = Arrays.asList("/authenticate");
+        } else {
+            this.scope = Arrays.asList("/read-limited");
         }
-        scope = Arrays.asList(s);
+        
         this.clientId = clientId;
         this.clientSecret = clientSecret;
         this.baseUserEndpoint = userEndpoint;

From 9475838583c127584ca81a49643a4a3d16910bf8 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 8 Jul 2020 11:21:39 +0200
Subject: [PATCH 37/57] Convert ORCID OAuth2 provider test from JUnit4 to
 JUnit5. #7025

---
 .../providers/oauth2/impl/OrcidOAuth2APTest.java       | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2APTest.java b/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2APTest.java
index 0009820ee41..33c54111d5c 100644
--- a/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2APTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2APTest.java
@@ -4,9 +4,9 @@
 import edu.harvard.iq.dataverse.authorization.providers.oauth2.AbstractOAuth2AuthenticationProvider;
 import edu.harvard.iq.dataverse.authorization.providers.oauth2.OAuth2Exception;
 import java.util.Arrays;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import org.junit.Test;
+
+import org.junit.jupiter.api.Test;
+import static org.junit.jupiter.api.Assertions.*;
 
 /**
  *
@@ -82,14 +82,14 @@ public void testExtractOrcid() throws OAuth2Exception {
         assertEquals("0000-0001-2345-6789", sut.extractOrcidNumber(response));
     }
     
-    @Test( expected=OAuth2Exception.class )
+    @Test
     public void testExtractOrcidBad() throws OAuth2Exception {
         // sample response from https://members.orcid.org/api/tutorial/read-orcid-records
         String response = "{\"access_token\":\"f5af9f51-07e6-4332-8f1a-c0c11c1e3728\",\"token_type\":\"bearer\",\n" +
                             "\"refresh_token\":\"f725f747-3a65-49f6-a231-3e8944ce464d\",\"expires_in\":631138518,\n" +
                             "\"scope\":\"/read-limited\",\"name\":\"Sofia Garcia\"}";
         OrcidOAuth2AP sut = new OrcidOAuth2AP("clientId", "clientSecret", "userEndpoint");
-        sut.extractOrcidNumber(response);
+        assertThrows(OAuth2Exception.class, () -> sut.extractOrcidNumber(response));
     }
     
     @Test

From d588210a497503ed0a715be04bef677bcb960f47 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 8 Jul 2020 11:28:23 +0200
Subject: [PATCH 38/57] Add test case for public ORCID API endpoint to result
 in scope /authenticate. #7025

---
 .../providers/oauth2/impl/OrcidOAuth2APTest.java    | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2APTest.java b/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2APTest.java
index 33c54111d5c..71bfc307f1c 100644
--- a/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2APTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2APTest.java
@@ -6,6 +6,10 @@
 import java.util.Arrays;
 
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.ValueSource;
+
 import static org.junit.jupiter.api.Assertions.*;
 
 /**
@@ -41,6 +45,15 @@ private static String loadResponseXML(String fname) {
 	    }
 	    return txt;
     }
+    
+    @ParameterizedTest
+    @CsvSource({"https://pub.orcid.org/v2.1/{ORCID}/person,/authenticate", "https://api.orcid.org/v2.0/{ORCID}/person,/read-limited"})
+    public void testPublicApiScope(String endpoint, String scope) {
+        // when
+        OrcidOAuth2AP provider = new OrcidOAuth2AP("clientId", "clientSecret", endpoint);
+        // then
+        assertEquals(scope, provider.getSpacedScope());
+    }
 
     @Test
     public void testParseUserResponse() {

From f580a80b5cf84c8eda824dd84d13aa351ee24e1c Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Wed, 8 Jul 2020 15:54:46 -0400
Subject: [PATCH 39/57] renamed the flyway script. #6505

---
 ...-zipdownload-jobs.sql => V4.20.0.5__6505-zipdownload-jobs.sql} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/main/resources/db/migration/{V4.20.0.4__6505-zipdownload-jobs.sql => V4.20.0.5__6505-zipdownload-jobs.sql} (100%)

diff --git a/src/main/resources/db/migration/V4.20.0.4__6505-zipdownload-jobs.sql b/src/main/resources/db/migration/V4.20.0.5__6505-zipdownload-jobs.sql
similarity index 100%
rename from src/main/resources/db/migration/V4.20.0.4__6505-zipdownload-jobs.sql
rename to src/main/resources/db/migration/V4.20.0.5__6505-zipdownload-jobs.sql

From 757a1207524275da1874f96858864e68c046c134 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 9 Jul 2020 15:45:55 -0400
Subject: [PATCH 40/57] added support for multiple file stores (#6505)

---
 .../iq/dataverse/FileDownloadServiceBean.java  | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
index e8be6ac087f..683142fc5c4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
@@ -547,7 +547,8 @@ public void addFileToCustomZipJob(String key, DataFile dataFile, Timestamp times
         
         try {
             StorageIO<DataFile> storageIO = DataAccess.getStorageIO(dataFile);
-            location = storageIO.getStorageLocation();
+            location = getDirectStorageLocatrion(storageIO.getStorageLocation());
+
             if (orig && dataFile.isTabularData()) {
                 location = location.concat(".orig");
             }
@@ -579,4 +580,19 @@ public void addFileToCustomZipJob(String key, DataFile dataFile, Timestamp times
                 + "'" + deleteTime + "';").executeUpdate();
     }
     
+    public String getDirectStorageLocatrion(String storageLocation) {
+        String storageDriverId;
+        int separatorIndex = storageLocation.indexOf("://");
+        if ( separatorIndex > 0 ) {
+            storageDriverId = storageLocation.substring(0,separatorIndex);
+        
+            String storageType = DataAccess.getDriverType(storageDriverId);
+            if ("file".equals(storageType) || "s3".equals(storageType)) {
+                return storageType.concat(storageLocation.substring(separatorIndex));
+            }
+        }
+            
+        return null; 
+    }
+    
 }

From 7f2bf94579f4afeca5ef9edd499955b04888fdbf Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 9 Jul 2020 16:07:12 -0400
Subject: [PATCH 41/57] extra words in the doc #6505

---
 .../source/installation/advanced.rst            | 17 ++++++++++-------
 scripts/zipdownload/README.md                   |  4 +++-
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/advanced.rst b/doc/sphinx-guides/source/installation/advanced.rst
index 7da559a619c..97d353431c2 100644
--- a/doc/sphinx-guides/source/installation/advanced.rst
+++ b/doc/sphinx-guides/source/installation/advanced.rst
@@ -64,19 +64,22 @@ source tree for more information.
 To install: You can follow the instructions in the file above to build
 ``ZipDownloadService-v1.0.0.jar``. It will also be available, pre-built as part of the Dataverse release on GitHub. Copy it, together with the shell
 script scripts/zipdownload/cgi-bin/zipdownload to the cgi-bin
-directory of the chosen Apache server (/var/www/cgi-bin standard).
-You may need to make extra Apache configuration changes to make sure /cgi-bin/zipdownload is accessible from the outside.
-For example, if this is the same Apache that's in front of your Dataverse Payara instance, you will need to add another pass through statement to your configuration:
+directory of the chosen Apache server (/var/www/cgi-bin standard). 
 
-``ProxyPassMatch ^/cgi-bin/zipdownload !``
-
-Edit the config lines in the shell script (zipdownload) to configure
+Make sure the shell script (zipdownload) is executable, and edit it to configure the
 database access credentials. Do note that the executable does not need
 access to the entire Dataverse database. A security-conscious admin
 can create a dedicated database user with access to just one table:
 ``CUSTOMZIPSERVICEREQUEST``.
 
-to activate in Dataverse::
+You may need to make extra Apache configuration changes to make sure /cgi-bin/zipdownload is accessible from the outside.
+For example, if this is the same Apache that's in front of your Dataverse Payara instance, you will need to add another pass through statement to your configuration:
+
+``ProxyPassMatch ^/cgi-bin/zipdownload !``
+
+Test this by accessing it directly at ``<SERVER URL>/cgi-bin/download``. You should get a ``404 No such download job!``. If instead you are getting an "internal server error", this may be an SELinux issue; try ``setenforce Permissive``. If you are getting a generic Dataverse "not found" page, review the ``ProxyPassMatch`` rule you have added. 
+
+To activate in Dataverse::
 
    curl -X PUT -d '/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:CustomZipDownloadServiceUrl
 
diff --git a/scripts/zipdownload/README.md b/scripts/zipdownload/README.md
index 2e18405fb92..74431094782 100644
--- a/scripts/zipdownload/README.md
+++ b/scripts/zipdownload/README.md
@@ -16,7 +16,9 @@ For example, if this is the same Apache that's in front of your Dataverse Payara
 
 ``ProxyPassMatch ^/cgi-bin/zipdownload !``
 
-to activate in Dataverse: 
+(see the "Advanced" section of the Installation Guide for some extra troubleshooting tips)
+
+To activate in Dataverse: 
 
 curl -X PUT -d '/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:CustomZipDownloadServiceUrl
 

From 2553845b3df2c2752b652c2439effd55db9041d8 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 14 Jul 2020 13:40:12 -0400
Subject: [PATCH 42/57] Fixed the chunking encoding error, that was preventing
 the download from working in some browsers. (#6505)

---
 .../dataverse/custom/service/download/ChunkingOutputStream.java | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ChunkingOutputStream.java b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ChunkingOutputStream.java
index 296847ad834..9ef0279b52d 100644
--- a/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ChunkingOutputStream.java
+++ b/scripts/zipdownload/src/main/java/edu/harvard/iq/dataverse/custom/service/download/ChunkingOutputStream.java
@@ -102,5 +102,7 @@ private void dumpChunk(byte[] data, int offset, int length) throws IOException {
         String chunkSizeLine = String.format(CHUNK_SIZE_FORMAT, length);
         super.out.write(chunkSizeLine.getBytes());
         super.out.write(data, offset, length);
+        // don't forget to close the chunk(!):
+        super.out.write(CHUNK_CLOSE);
     }
 }

From 901fe6f59578bb5c7387ee84c92144a028d512fe Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 14 Jul 2020 14:48:34 -0400
Subject: [PATCH 43/57] Fix for #7060

---
 .../RedirectionExceptionHandler.java          | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/api/errorhandlers/RedirectionExceptionHandler.java

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/errorhandlers/RedirectionExceptionHandler.java b/src/main/java/edu/harvard/iq/dataverse/api/errorhandlers/RedirectionExceptionHandler.java
new file mode 100644
index 00000000000..21e677180be
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/api/errorhandlers/RedirectionExceptionHandler.java
@@ -0,0 +1,32 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package edu.harvard.iq.dataverse.api.errorhandlers;
+
+import edu.harvard.iq.dataverse.util.BundleUtil;
+import javax.json.Json;
+import javax.servlet.http.HttpServletRequest;
+import javax.ws.rs.RedirectionException;
+import javax.ws.rs.core.Context;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.ext.ExceptionMapper;
+import javax.ws.rs.ext.Provider;
+
+/**
+ *
+ * @author qqmyers
+ */
+@Provider
+public class RedirectionExceptionHandler implements ExceptionMapper<RedirectionException> {
+    
+    @Context
+    HttpServletRequest request;
+
+    @Override
+    public Response toResponse(RedirectionException ex) {
+        return ex.getResponse();
+    }
+    
+}

From 2fa6a9fc3ed513a9904f0321f7b681f175b229b3 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 15 Jul 2020 11:49:06 -0400
Subject: [PATCH 44/57] remove use of generic WebApplicationException, add 500
 exceptionhandler

The ThrowableExceptionHandler overrides default handling of
WebApplicationException, so we either need to handle all the relevant
subclasses directly, or re-implement the logic for
WebApplicationException handling that queries the status or response
supplied to find the right error code to send.
---
 .../api/BundleDownloadInstanceWriter.java     |  8 ++--
 .../dataverse/api/DownloadInstanceWriter.java |  6 +--
 .../InternalServerErrorExceptionHandler.java  | 39 +++++++++++++++++++
 3 files changed, 46 insertions(+), 7 deletions(-)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/api/errorhandlers/InternalServerErrorExceptionHandler.java

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/BundleDownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/BundleDownloadInstanceWriter.java
index c306da51231..7edb0ac838c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/BundleDownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/BundleDownloadInstanceWriter.java
@@ -12,12 +12,12 @@
 import java.io.OutputStream;
 import java.io.IOException;
 
+import javax.ws.rs.InternalServerErrorException;
+import javax.ws.rs.NotFoundException;
 import javax.ws.rs.WebApplicationException;
 
 import javax.ws.rs.core.MediaType;
 import javax.ws.rs.core.MultivaluedMap;
-import javax.ws.rs.core.Response;
-
 import javax.ws.rs.ext.MessageBodyWriter;
 import javax.ws.rs.ext.Provider;
 
@@ -196,10 +196,10 @@ public void writeTo(BundleDownloadInstance di, Class<?> clazz, Type type, Annota
                 }
             }
         } catch (IOException ioex) {
-            throw new WebApplicationException(Response.Status.INTERNAL_SERVER_ERROR);
+            throw new InternalServerErrorException();
         }
 
-        throw new WebApplicationException(Response.Status.NOT_FOUND);
+        throw new NotFoundException();
 
     }
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index a9b4746299d..b10412a577d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -244,7 +244,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         }
                         
                         if (redirect_url_str == null) {
-                            throw new WebApplicationException(new ServiceUnavailableException());
+                            throw new ServiceUnavailableException();
                         }
                         
                         logger.fine("Data Access API: direct S3 url: "+redirect_url_str);
@@ -274,7 +274,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                             logger.fine("Issuing redirect to the file location on S3.");
                             throw new RedirectionException(response);
                         }
-                        throw new WebApplicationException(new ServiceUnavailableException());
+                        throw new ServiceUnavailableException();
                     }
                 }
                 
@@ -368,7 +368,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
             }
         }
         
-        throw new WebApplicationException(Response.Status.NOT_FOUND);
+        throw new NotFoundException();
 
     }
     
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/errorhandlers/InternalServerErrorExceptionHandler.java b/src/main/java/edu/harvard/iq/dataverse/api/errorhandlers/InternalServerErrorExceptionHandler.java
new file mode 100644
index 00000000000..f59dd5029fc
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/api/errorhandlers/InternalServerErrorExceptionHandler.java
@@ -0,0 +1,39 @@
+package edu.harvard.iq.dataverse.api.errorhandlers;
+
+import java.util.UUID;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import javax.json.Json;
+import javax.servlet.http.HttpServletRequest;
+import javax.ws.rs.InternalServerErrorException;
+import javax.ws.rs.core.Context;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.ext.ExceptionMapper;
+import javax.ws.rs.ext.Provider;
+
+/**
+ * Produces custom 500 messages for the API.
+ * @author qqmyers
+ */
+@Provider
+public class InternalServerErrorExceptionHandler implements ExceptionMapper<InternalServerErrorException>{
+    
+    private static final Logger logger = Logger.getLogger(InternalServerErrorExceptionHandler.class.getName());
+    
+    @Context
+    HttpServletRequest request;
+    
+    @Override
+    public Response toResponse(InternalServerErrorException ex){
+        String incidentId = UUID.randomUUID().toString();
+        logger.log(Level.SEVERE, "API internal error " + incidentId +": " + ex.getMessage(), ex);
+        return Response.status(500)
+                .entity( Json.createObjectBuilder()
+                             .add("status", "ERROR")
+                             .add("code", 500)
+                             .add("message", "Internal server error. More details available at the server logs.")
+                             .add("incidentId", incidentId)
+                        .build())
+                .type("application/json").build();
+    }
+}

From 2e25e0e20870c50dbe5abd7e2eb2afe49451b31d Mon Sep 17 00:00:00 2001
From: Don Sizemore <don.sizemore@gmail.com>
Date: Thu, 16 Jul 2020 13:59:15 -0400
Subject: [PATCH 45/57] #7092 sphinx-build fail on warning

---
 doc/sphinx-guides/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/Makefile b/doc/sphinx-guides/Makefile
index fd0a8f260fb..b11416165a7 100755
--- a/doc/sphinx-guides/Makefile
+++ b/doc/sphinx-guides/Makefile
@@ -2,7 +2,7 @@
 #
 
 # You can set these variables from the command line.
-SPHINXOPTS    =
+SPHINXOPTS    = -W
 SPHINXBUILD   = sphinx-build
 PAPER         =
 BUILDDIR      = build

From 115353c44aa69a48e1b729d9abeb87a63c6f3411 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 16 Jul 2020 14:05:57 -0400
Subject: [PATCH 46/57] prevent orig file size from being added to tabular
 files #6118

---
 .../java/edu/harvard/iq/dataverse/DatasetPage.java  |  6 ++++--
 .../harvard/iq/dataverse/DatasetServiceBean.java    | 13 ++++++++++---
 .../command/impl/GetDatasetStorageSizeCommand.java  |  8 ++++++--
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 1c92d8ce02c..c9cffc92889 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -2903,7 +2903,8 @@ public void setSelectedNonDownloadallableFiles(List<FileMetadata> selectedNonDow
     public String getSizeOfDataset() {
         boolean countCachedFiles = false;
         boolean useOrigFileSize = false;
-        GetDatasetStorageSizeCommand cmd = new GetDatasetStorageSizeCommand(dvRequestService.getDataverseRequest(), dataset, countCachedFiles, useOrigFileSize, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, workingVersion);
+        boolean dbOnly = true;
+        GetDatasetStorageSizeCommand cmd = new GetDatasetStorageSizeCommand(dvRequestService.getDataverseRequest(), dataset, countCachedFiles, dbOnly, useOrigFileSize, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, workingVersion);
         try {
             long bytes = commandEngine.submit(cmd);
             return FileSizeChecker.bytesToHumanReadable(bytes);
@@ -2915,7 +2916,8 @@ public String getSizeOfDataset() {
     public String getSizeOfDatasetOrig() {
         boolean countCachedFiles = false;
         boolean useOrigFileSize = true;
-        GetDatasetStorageSizeCommand cmd = new GetDatasetStorageSizeCommand(dvRequestService.getDataverseRequest(), dataset, countCachedFiles, useOrigFileSize, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, workingVersion);
+        boolean dbOnly = true;
+        GetDatasetStorageSizeCommand cmd = new GetDatasetStorageSizeCommand(dvRequestService.getDataverseRequest(), dataset, countCachedFiles, useOrigFileSize, dbOnly, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, workingVersion);
         try {
             long bytes = commandEngine.submit(cmd);
             return FileSizeChecker.bytesToHumanReadable(bytes);
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
index 8ca820988f3..78ec4d7cefe 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
@@ -882,7 +882,8 @@ public long findStorageSize(Dataset dataset, boolean countCachedExtras) throws I
   
     public long findStorageSize(Dataset dataset, boolean countCachedExtras, GetDatasetStorageSizeCommand.Mode mode, DatasetVersion version) throws IOException {
         boolean useOrigFileSize = false;
-        return findStorageSize(dataset, countCachedExtras, useOrigFileSize, mode, version);
+        boolean dbOnly = false;
+        return findStorageSize(dataset, countCachedExtras, useOrigFileSize, dbOnly, mode, version);
     }
     /**
      * Returns the total byte size of the files in this dataset 
@@ -892,13 +893,14 @@ public long findStorageSize(Dataset dataset, boolean countCachedExtras, GetDatas
      * @param useOrigFileSize allows original tabular file size to be used instead of derived archival file
      * @param mode String indicating whether we are getting the result for storage (entire dataset) or download version based
      * @param version optional param for dataset version
+     * @param dbOnly only get bytes from database, no countCachedExtras processing
      * @return total size 
      * @throws IOException if it can't access the objects via StorageIO 
      * (in practice, this can only happen when called with countCachedExtras=true; when run in the 
      * default mode, the method doesn't need to access the storage system, as the 
      * sizes of the main files are recorded in the database)
      */
-    public long findStorageSize(Dataset dataset, boolean countCachedExtras, boolean useOrigFileSize, GetDatasetStorageSizeCommand.Mode mode, DatasetVersion version) throws IOException {
+    public long findStorageSize(Dataset dataset, boolean countCachedExtras, boolean useOrigFileSize, boolean dbOnly, GetDatasetStorageSizeCommand.Mode mode, DatasetVersion version) throws IOException {
         long total = 0L; 
         
         if (dataset.isHarvested()) {
@@ -935,8 +937,13 @@ public long findStorageSize(Dataset dataset, boolean countCachedExtras, boolean
                 total += datafile.getFilesize();
             }
 
+            if (dbOnly) {
+                // Skip the rest. Don't add any more to the total. No cached extras.
+                continue;
+            }
+
             if (!countCachedExtras) {
-                if (!useOrigFileSize && datafile.isTabularData()) {
+                if (datafile.isTabularData()) {
                     // count the size of the stored original, in addition to the main tab-delimited file:
                     Long originalFileSize = datafile.getDataTable().getOriginalFileSize();
                     if (originalFileSize != null) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java
index 4b55f90bf98..ab11110738c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java
@@ -33,6 +33,7 @@ public class GetDatasetStorageSizeCommand extends AbstractCommand<Long> {
     private final Dataset dataset;
     private final Boolean countCachedFiles;
     private final boolean useOrigFileSize;
+    private final boolean dbOnly;
     private final Mode mode;
     private final DatasetVersion version;
     
@@ -46,6 +47,7 @@ public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target) {
         dataset = target;
         countCachedFiles = false;
         this.useOrigFileSize = false;
+        this.dbOnly = false;
         mode = Mode.DOWNLOAD;
         version = null;
     }
@@ -55,15 +57,17 @@ public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target, b
         dataset = target;
         this.countCachedFiles = countCachedFiles;
         this.useOrigFileSize = false;
+        this.dbOnly = false;
         this.mode = mode;
         this.version = version;
     }
 
-    public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target, boolean countCachedFiles, boolean useOrigFileSize, Mode mode, DatasetVersion version) {
+    public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target, boolean countCachedFiles, boolean useOrigFileSize, boolean dbOnly, Mode mode, DatasetVersion version) {
         super(aRequest, target);
         dataset = target;
         this.countCachedFiles = countCachedFiles;
         this.useOrigFileSize = useOrigFileSize;
+        this.dbOnly = dbOnly;
         this.mode = mode;
         this.version = version;
     }
@@ -78,7 +82,7 @@ public Long execute(CommandContext ctxt) throws CommandException {
         }
 
         try {
-            return ctxt.datasets().findStorageSize(dataset, countCachedFiles, useOrigFileSize, mode, version);
+            return ctxt.datasets().findStorageSize(dataset, countCachedFiles, useOrigFileSize, dbOnly, mode, version);
         } catch (IOException ex) {
             throw new CommandException(BundleUtil.getStringFromBundle("datasets.api.datasize.ioerror"), this);
         }

From 7e79f1beb3372f1d536e67d4558bbf69e649fb56 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 16 Jul 2020 14:45:37 -0400
Subject: [PATCH 47/57] use version to determine if tabular download #6118

---
 .../edu/harvard/iq/dataverse/DatasetPage.java | 26 +++++++++++++++++--
 src/main/webapp/dataset.xhtml                 |  6 ++---
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index c9cffc92889..a357ff63a60 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -312,6 +312,14 @@ public enum DisplayMode {
     private Boolean hasRsyncScript = false;
     
     private Boolean hasTabular = false;
+
+    /**
+     * If the dataset version has at least one tabular file. The "hasTabular"
+     * boolean is for the dataset level ("has ever had a tabular file") but
+     * sometimes you want to know about the current version ("no tabular files
+     * currently"). Like all files, tabular files can be deleted.
+     */
+    private boolean versionHasTabular = false;
     
     private boolean showIngestSuccess;
 
@@ -2040,7 +2048,18 @@ private String init(boolean initFull) {
                 
         displayLockInfo(dataset);
             
+        for (FileMetadata fmd : workingVersion.getFileMetadatas()) {
+            if (fmd.getDataFile().isTabularData()) {
+                versionHasTabular = true;
+                break;
+            }
+        }
         for(DataFile f : dataset.getFiles()) {
+            // TODO: Consider uncommenting this optimization.
+//            if (versionHasTabular) {
+//                hasTabular = true;
+//                break;
+//            }
             if(f.isTabularData()) {
                 hasTabular = true;
                 break;
@@ -2260,8 +2279,11 @@ private DefaultTreeNode createFileTreeNode(FileMetadata fileMetadata, TreeNode p
     public boolean isHasTabular() {
         return hasTabular;
     }
-    
-    
+
+    public boolean isVersionHasTabular() {
+        return versionHasTabular;
+    }
+
     public boolean isReadOnly() {
         return readOnly; 
     }
diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml
index 579666705f9..d8809842d43 100644
--- a/src/main/webapp/dataset.xhtml
+++ b/src/main/webapp/dataset.xhtml
@@ -143,7 +143,7 @@
                                                     <ui:fragment rendered="#{DatasetPage.canDownloadFiles()}">
                                                         <li class="dropdown-header">#{bundle['dataset.accessBtn.header.download']} <span class="glyphicon glyphicon-download-alt"/></li>
                                                         <!-- NORMAL DOWNLOAD BUTTON (NO TABULAR FILES) -->
-                                                        <li jsf:rendered="#{!DatasetPage.isHasTabular()}">
+                                                        <li jsf:rendered="#{!DatasetPage.versionHasTabular}">
                                                             <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadArchival()}" styleClass="btn-download">
                                                                 #{bundle.download}
                                                                 <h:outputFormat value="#{bundle['dataset.accessBtn.download.size']}">
@@ -152,7 +152,7 @@
                                                             </p:commandLink>
                                                         </li>
                                                         <!-- DOWNLOAD ORIGINAL BUTTON (TABULAR FILES PRESENT) -->
-                                                        <li jsf:rendered="#{DatasetPage.isHasTabular()}">
+                                                        <li jsf:rendered="#{DatasetPage.versionHasTabular}">
                                                             <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadOriginal()}" styleClass="btn-download">
                                                                 #{bundle.downloadOriginal}
                                                                 <h:outputFormat value="#{bundle['dataset.accessBtn.download.size']}">
@@ -161,7 +161,7 @@
                                                             </p:commandLink>
                                                         </li>
                                                         <!-- DOWNLOAD ARCHIVAL FILES (TABULAR FILES PRESENT) -->
-                                                        <li jsf:rendered="#{DatasetPage.isHasTabular()}">
+                                                        <li jsf:rendered="#{DatasetPage.versionHasTabular}">
                                                             <p:commandLink update="@form" actionListener="#{DatasetPage.validateAllFilesForDownloadArchival()}" styleClass="btn-download">
                                                                 #{bundle.downloadArchival}
                                                                 <h:outputFormat value="#{bundle['dataset.accessBtn.download.size']}">

From 5c751a462e013fcc321ed612a3cbb6b8d7a6a3b5 Mon Sep 17 00:00:00 2001
From: jingma <ma.jing@outlook.com>
Date: Thu, 16 Jul 2020 20:49:04 +0200
Subject: [PATCH 48/57] 3648 Change "DVN" to default value "producer" and only
 write the first geoBndBox element with four geo coordinates.

---
 .../dataverse/export/ddi/DdiExportUtil.java   | 37 ++++++++++---------
 .../dataverse/export/ddi/dataset-finch1.xml   |  2 +-
 .../iq/dataverse/export/ddi/exportfull.xml    |  8 +---
 3 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
index 04ec3056752..b5d8890b352 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
@@ -328,7 +328,7 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase
     
     private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException{
         xmlw.writeStartElement("verStmt");
-        writeAttribute(xmlw,"source","DVN"); 
+        writeAttribute(xmlw,"source","producer"); 
         xmlw.writeStartElement("version");
         writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10));
         writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); 
@@ -436,24 +436,27 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset
                     if (DatasetFieldConstant.geographicBoundingBox.equals(fieldDTO.getTypeName())) {
 
                         for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) {
-                            xmlw.writeStartElement("geoBndBox");
-                            for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) {
-                                FieldDTO next = iterator.next();
-                                if (DatasetFieldConstant.westLongitude.equals(next.getTypeName())) {
-                                    writeFullElement(xmlw, "westBL", next.getSinglePrimitive());
-                                }
-                                if (DatasetFieldConstant.eastLongitude.equals(next.getTypeName())) {
-                                    writeFullElement(xmlw, "eastBL", next.getSinglePrimitive());
-                                }
-                                if (DatasetFieldConstant.northLatitude.equals(next.getTypeName())) {
-                                    writeFullElement(xmlw, "northBL", next.getSinglePrimitive());
-                                }  
-                                if (DatasetFieldConstant.southLatitude.equals(next.getTypeName())) {
-                                    writeFullElement(xmlw, "southBL", next.getSinglePrimitive());
-                                }                               
+                            if(foo.size() == 4) {
+                                xmlw.writeStartElement("geoBndBox");
+                                for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext(); ) {
+                                    FieldDTO next = iterator.next();
+                                    if (DatasetFieldConstant.westLongitude.equals(next.getTypeName())) {
+                                        writeFullElement(xmlw, "westBL", next.getSinglePrimitive());
+                                    }
+                                    if (DatasetFieldConstant.eastLongitude.equals(next.getTypeName())) {
+                                        writeFullElement(xmlw, "eastBL", next.getSinglePrimitive());
+                                    }
+                                    if (DatasetFieldConstant.northLatitude.equals(next.getTypeName())) {
+                                        writeFullElement(xmlw, "northBL", next.getSinglePrimitive());
+                                    }
+                                    if (DatasetFieldConstant.southLatitude.equals(next.getTypeName())) {
+                                        writeFullElement(xmlw, "southBL", next.getSinglePrimitive());
+                                    }
 
+                                }
+                                xmlw.writeEndElement();
+                                break;
                             }
-                            xmlw.writeEndElement();
                         }
 
                     }
diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml
index 79e9e363994..8eb57721a16 100644
--- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml
+++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml
@@ -7,7 +7,7 @@
         <IDNo agency="DOI">doi:10.5072/FK2/PCA2E3</IDNo>
       </titlStmt>
       <distStmt/>
-      <verStmt source="DVN">
+      <verStmt source="producer">
         <version date="2015-09-24" type="RELEASED">1</version>
       </verStmt>
       <biblCit>Finch, Fiona, 2015, "Darwin's Finches", https://doi.org/10.5072/FK2/PCA2E3,  Root Dataverse,  V1</biblCit>
diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml b/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml
index d9be1217fc9..d409db66665 100644
--- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml
+++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml
@@ -10,7 +10,7 @@
         <distrbtr source="archive">Root</distrbtr>
         <distDate>2020-02-19</distDate>
       </distStmt>
-      <verStmt source="DVN">
+      <verStmt source="producer">
         <version date="2020-02-19" type="RELEASED">1</version>
       </verStmt>
       <biblCit>LastAuthor1, FirstAuthor1; LastAuthor2, FirstAuthor2, 2020, "Replication Data for: Title", https://doi.org/10.5072/FK2/WKUKGV, Root, V1</biblCit>
@@ -93,12 +93,6 @@
           <northBL>30</northBL>
           <southBL>40</southBL>
         </geoBndBox>
-        <geoBndBox>
-          <southBL>80</southBL>
-          <northBL>70</northBL>
-          <eastBL>60</eastBL>
-          <westBL>50</westBL>
-        </geoBndBox>
         <geogUnit>GeographicUnit1</geogUnit>
         <geogUnit>GeographicUnit2</geogUnit>
         <anlyUnit>UnitOfAnalysis1</anlyUnit>

From c13a5cc688178da83577f59ebc0791301aca6de2 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 16 Jul 2020 16:12:31 -0400
Subject: [PATCH 49/57] switch to smaller, simpler download size method #6118

Also, revert changes to GetDatasetStorageSizeCommand
and DatasetServiceBean since we won't be using them.
---
 .../edu/harvard/iq/dataverse/DatasetPage.java | 24 ++------
 .../iq/dataverse/DatasetServiceBean.java      | 56 ++++++-------------
 .../iq/dataverse/dataset/DatasetUtil.java     | 21 +++++++
 .../impl/GetDatasetStorageSizeCommand.java    | 18 +-----
 4 files changed, 42 insertions(+), 77 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index a357ff63a60..dfa68d2cf32 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -2923,29 +2923,13 @@ public void setSelectedNonDownloadallableFiles(List<FileMetadata> selectedNonDow
     }
 
     public String getSizeOfDataset() {
-        boolean countCachedFiles = false;
-        boolean useOrigFileSize = false;
-        boolean dbOnly = true;
-        GetDatasetStorageSizeCommand cmd = new GetDatasetStorageSizeCommand(dvRequestService.getDataverseRequest(), dataset, countCachedFiles, dbOnly, useOrigFileSize, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, workingVersion);
-        try {
-            long bytes = commandEngine.submit(cmd);
-            return FileSizeChecker.bytesToHumanReadable(bytes);
-        } catch (CommandException ex) {
-            return "";
-        }
+        boolean original = false;
+        return DatasetUtil.getDownloadSize(workingVersion, original);
     }
 
     public String getSizeOfDatasetOrig() {
-        boolean countCachedFiles = false;
-        boolean useOrigFileSize = true;
-        boolean dbOnly = true;
-        GetDatasetStorageSizeCommand cmd = new GetDatasetStorageSizeCommand(dvRequestService.getDataverseRequest(), dataset, countCachedFiles, useOrigFileSize, dbOnly, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, workingVersion);
-        try {
-            long bytes = commandEngine.submit(cmd);
-            return FileSizeChecker.bytesToHumanReadable(bytes);
-        } catch (CommandException ex) {
-            return "";
-        }
+        boolean original = true;
+        return DatasetUtil.getDownloadSize(workingVersion, original);
     }
 
     public void validateAllFilesForDownloadArchival() {
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
index 78ec4d7cefe..0853ee785bb 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
@@ -880,27 +880,20 @@ public long findStorageSize(Dataset dataset, boolean countCachedExtras) throws I
         return findStorageSize(dataset, countCachedExtras, GetDatasetStorageSizeCommand.Mode.STORAGE, null);
     }
   
-    public long findStorageSize(Dataset dataset, boolean countCachedExtras, GetDatasetStorageSizeCommand.Mode mode, DatasetVersion version) throws IOException {
-        boolean useOrigFileSize = false;
-        boolean dbOnly = false;
-        return findStorageSize(dataset, countCachedExtras, useOrigFileSize, dbOnly, mode, version);
-    }
     /**
      * Returns the total byte size of the files in this dataset 
      * 
      * @param dataset
      * @param countCachedExtras boolean indicating if the cached disposable extras should also be counted
-     * @param useOrigFileSize allows original tabular file size to be used instead of derived archival file
      * @param mode String indicating whether we are getting the result for storage (entire dataset) or download version based
      * @param version optional param for dataset version
-     * @param dbOnly only get bytes from database, no countCachedExtras processing
      * @return total size 
      * @throws IOException if it can't access the objects via StorageIO 
      * (in practice, this can only happen when called with countCachedExtras=true; when run in the 
      * default mode, the method doesn't need to access the storage system, as the 
      * sizes of the main files are recorded in the database)
      */
-    public long findStorageSize(Dataset dataset, boolean countCachedExtras, boolean useOrigFileSize, boolean dbOnly, GetDatasetStorageSizeCommand.Mode mode, DatasetVersion version) throws IOException {
+    public long findStorageSize(Dataset dataset, boolean countCachedExtras, GetDatasetStorageSizeCommand.Mode mode, DatasetVersion version) throws IOException {
         long total = 0L; 
         
         if (dataset.isHarvested()) {
@@ -920,44 +913,27 @@ public long findStorageSize(Dataset dataset, boolean countCachedExtras, boolean
     
         
         //CACHED EXTRAS FOR DOWNLOAD?
-
-
+        
+        
         for (DataFile datafile : filesToTally) {
-            if (datafile.isTabularData()) {
-                if (useOrigFileSize) {
-                    // count the size of the stored original, rather than the main tab-delimited file
-                    Long originalFileSize = datafile.getDataTable().getOriginalFileSize();
-                    if (originalFileSize != null) {
-                        total += originalFileSize;
-                    }
-                } else {
-                    total += datafile.getFilesize();
-                }
-            } else {
                 total += datafile.getFilesize();
-            }
-
-            if (dbOnly) {
-                // Skip the rest. Don't add any more to the total. No cached extras.
-                continue;
-            }
 
-            if (!countCachedExtras) {
-                if (datafile.isTabularData()) {
-                    // count the size of the stored original, in addition to the main tab-delimited file:
-                    Long originalFileSize = datafile.getDataTable().getOriginalFileSize();
-                    if (originalFileSize != null) {
-                        total += originalFileSize;
+                if (!countCachedExtras) {
+                    if (datafile.isTabularData()) {
+                        // count the size of the stored original, in addition to the main tab-delimited file:
+                        Long originalFileSize = datafile.getDataTable().getOriginalFileSize();
+                        if (originalFileSize != null) {
+                            total += originalFileSize;
+                        }
+                    }
+                } else {
+                    StorageIO<DataFile> storageIO = datafile.getStorageIO();
+                    for (String cachedFileTag : storageIO.listAuxObjects()) {
+                        total += storageIO.getAuxObjectSize(cachedFileTag);
                     }
-                }
-            } else {
-                StorageIO<DataFile> storageIO = datafile.getStorageIO();
-                for (String cachedFileTag : storageIO.listAuxObjects()) {
-                    total += storageIO.getAuxObjectSize(cachedFileTag);
                 }
             }
-        }
-
+        
         // and finally,
         if (countCachedExtras) {
             // count the sizes of the files cached for the dataset itself
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
index f6de6b0c3f5..a672d2fd413 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
@@ -34,6 +34,7 @@
 import static edu.harvard.iq.dataverse.dataaccess.DataAccess.getStorageIO;
 import static edu.harvard.iq.dataverse.dataaccess.DataAccess.getStorageIO;
 import static edu.harvard.iq.dataverse.dataaccess.DataAccess.getStorageIO;
+import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker;
 
 public class DatasetUtil {
 
@@ -440,4 +441,24 @@ public static List<DatasetField> getDatasetSummaryFields(DatasetVersion datasetV
         return datasetFields;
     }
 
+    /**
+     * Given a dataset version, return it's size in human readable units such as
+     * 42.9 MB.There is a GetDatasetStorageSizeCommand but it's overly complex
+     * for the use case.
+     *
+     * @param original Use the original file size rather than the archival file
+     * size for tabular files.
+     */
+    public static String getDownloadSize(DatasetVersion dsv, boolean original) {
+        long bytes = 0l;
+        for (FileMetadata fileMetadata : dsv.getFileMetadatas()) {
+            DataFile dataFile = fileMetadata.getDataFile();
+            if (original && dataFile.isTabularData()) {
+                bytes += dataFile.getOriginalFileSize();
+            } else {
+                bytes += dataFile.getFilesize();
+            }
+        }
+        return FileSizeChecker.bytesToHumanReadable(bytes);
+    }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java
index ab11110738c..f1f27fdcee2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java
@@ -32,8 +32,6 @@ public class GetDatasetStorageSizeCommand extends AbstractCommand<Long> {
 
     private final Dataset dataset;
     private final Boolean countCachedFiles;
-    private final boolean useOrigFileSize;
-    private final boolean dbOnly;
     private final Mode mode;
     private final DatasetVersion version;
     
@@ -46,8 +44,6 @@ public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target) {
         super(aRequest, target);
         dataset = target;
         countCachedFiles = false;
-        this.useOrigFileSize = false;
-        this.dbOnly = false;
         mode = Mode.DOWNLOAD;
         version = null;
     }
@@ -56,18 +52,6 @@ public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target, b
         super(aRequest, target);
         dataset = target;
         this.countCachedFiles = countCachedFiles;
-        this.useOrigFileSize = false;
-        this.dbOnly = false;
-        this.mode = mode;
-        this.version = version;
-    }
-
-    public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target, boolean countCachedFiles, boolean useOrigFileSize, boolean dbOnly, Mode mode, DatasetVersion version) {
-        super(aRequest, target);
-        dataset = target;
-        this.countCachedFiles = countCachedFiles;
-        this.useOrigFileSize = useOrigFileSize;
-        this.dbOnly = dbOnly;
         this.mode = mode;
         this.version = version;
     }
@@ -82,7 +66,7 @@ public Long execute(CommandContext ctxt) throws CommandException {
         }
 
         try {
-            return ctxt.datasets().findStorageSize(dataset, countCachedFiles, useOrigFileSize, dbOnly, mode, version);
+            return ctxt.datasets().findStorageSize(dataset, countCachedFiles, mode, version);
         } catch (IOException ex) {
             throw new CommandException(BundleUtil.getStringFromBundle("datasets.api.datasize.ioerror"), this);
         }

From 174ec432811710cc701b341ea99d0e5eb71ecf5d Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 16 Jul 2020 16:24:55 -0400
Subject: [PATCH 50/57] remove unused imports #6118

---
 src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index dfa68d2cf32..8ec2289d38e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -17,7 +17,6 @@
 import edu.harvard.iq.dataverse.datacapturemodule.ScriptRequestResponse;
 import edu.harvard.iq.dataverse.dataset.DatasetThumbnail;
 import edu.harvard.iq.dataverse.dataset.DatasetUtil;
-import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker;
 import edu.harvard.iq.dataverse.datavariable.VariableServiceBean;
 import edu.harvard.iq.dataverse.engine.command.Command;
 import edu.harvard.iq.dataverse.engine.command.CommandContext;
@@ -92,11 +91,9 @@
 import javax.faces.model.SelectItem;
 import java.util.logging.Level;
 import edu.harvard.iq.dataverse.datasetutility.WorldMapPermissionHelper;
-import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
 import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
 import edu.harvard.iq.dataverse.engine.command.impl.AbstractSubmitToArchiveCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.RequestRsyncScriptCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetResult;

From 68ee97b0cb0b395ff53e8d4b485073bfd20ce9f7 Mon Sep 17 00:00:00 2001
From: Gustavo Durand <scolapasta+github@gmail.com>
Date: Fri, 17 Jul 2020 13:19:41 -0400
Subject: [PATCH 51/57] Update FinalizeDatasetPublicationCommand.java

---
 .../engine/command/impl/FinalizeDatasetPublicationCommand.java   | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java
index ef67a661352..1edb5b44de4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java
@@ -204,7 +204,6 @@ public boolean onSuccess(CommandContext ctxt, Object r) {
             retVal = false;
         }
 
-        ctxt.solrIndex().indexPermissionsForOneDvObject(dataset);
         exportMetadata(dataset, ctxt.settings());
         ctxt.datasets().updateLastExportTimeStamp(dataset.getId());
         return retVal;

From 706214741875868bae94381af114a927ac0e12e8 Mon Sep 17 00:00:00 2001
From: Gustavo Durand <scolapasta+github@gmail.com>
Date: Fri, 17 Jul 2020 14:04:56 -0400
Subject: [PATCH 52/57] Update SolrIndexServiceBean.java

---
 .../iq/dataverse/search/SolrIndexServiceBean.java      | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
index e77d05a9c4c..632aac1d1f8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
@@ -405,8 +405,6 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint)
                 }
             }
         } else if (definitionPoint.isInstanceofDataset()) {
-            // index the dataset itself
-            indexPermissionsForOneDvObject(definitionPoint);
             dvObjectsToReindexPermissionsFor.add(definitionPoint);
             // index files
             Dataset dataset = (Dataset) definitionPoint;
@@ -431,16 +429,10 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint)
              * @todo do something with this response
              */
             IndexResponse indexResponse = indexPermissionsForOneDvObject(dvObject);
-            DvObject managedDefinitionPoint = dvObjectService.updatePermissionIndexTime(definitionPoint);
-            boolean updatePermissionTimeSuccessful = false;
-            if (managedDefinitionPoint != null) {
-                updatePermissionTimeSuccessful = true;
-            }
             updatePermissionTimeSuccessStatus.add(dvObject + ":" + updatePermissionTimeSuccessful);
         }
         return new IndexResponse("Number of dvObject permissions indexed for " + definitionPoint
-                + " (updatePermissionTimeSuccessful:" + updatePermissionTimeSuccessStatus
-                + "): " + dvObjectsToReindexPermissionsFor.size()
+                + ": " + dvObjectsToReindexPermissionsFor.size()
         );
     }
 

From 6de6fa42774c03b6b55ab11acd2f6f0c05d65062 Mon Sep 17 00:00:00 2001
From: Gustavo Durand <scolapasta+github@gmail.com>
Date: Fri, 17 Jul 2020 14:11:33 -0400
Subject: [PATCH 53/57] Update SolrIndexServiceBean.java

---
 .../edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
index 632aac1d1f8..ba98f0e70b6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
@@ -429,8 +429,8 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint)
              * @todo do something with this response
              */
             IndexResponse indexResponse = indexPermissionsForOneDvObject(dvObject);
-            updatePermissionTimeSuccessStatus.add(dvObject + ":" + updatePermissionTimeSuccessful);
         }
+        
         return new IndexResponse("Number of dvObject permissions indexed for " + definitionPoint
                 + ": " + dvObjectsToReindexPermissionsFor.size()
         );

From 14be6c4502efd8bc8356a902aa79006902bc3742 Mon Sep 17 00:00:00 2001
From: Gustavo Durand <scolapasta+github@gmail.com>
Date: Fri, 17 Jul 2020 14:13:12 -0400
Subject: [PATCH 54/57] Update SolrIndexServiceBean.java

---
 .../edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java    | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
index ba98f0e70b6..14e9869aab3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
@@ -423,7 +423,6 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint)
          */
         String response = reindexFilesInBatches(filesToReindexAsBatch);
 
-        List<String> updatePermissionTimeSuccessStatus = new ArrayList<>();
         for (DvObject dvObject : dvObjectsToReindexPermissionsFor) {
             /**
              * @todo do something with this response

From 4417a0243996560da4e57da9f766f66ffdf8c39b Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 17 Jul 2020 15:33:00 -0400
Subject: [PATCH 55/57] make ec2-create-instance.sh downloadable #7099

---
 doc/sphinx-guides/source/developers/deployment.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/developers/deployment.rst b/doc/sphinx-guides/source/developers/deployment.rst
index a974be25eeb..3bb4b99913c 100755
--- a/doc/sphinx-guides/source/developers/deployment.rst
+++ b/doc/sphinx-guides/source/developers/deployment.rst
@@ -82,7 +82,9 @@ Download and Run the "Create Instance" Script
 
 Once you have done the configuration above, you are ready to try running the "ec2-create-instance.sh" script to spin up Dataverse in AWS.
 
-Download :download:`ec2-create-instance.sh<https://raw.githubusercontent.com/GlobalDataverseCommunityConsortium/dataverse-ansible/master/ec2/ec2-create-instance.sh>` and put it somewhere reasonable. For the purpose of these instructions we'll assume it's in the "Downloads" directory in your home directory.
+Download `ec2-create-instance.sh`_ and put it somewhere reasonable. For the purpose of these instructions we'll assume it's in the "Downloads" directory in your home directory.
+
+.. _ec2-create-instance.sh: https://raw.githubusercontent.com/GlobalDataverseCommunityConsortium/dataverse-ansible/master/ec2/ec2-create-instance.sh
 
 To run it with default values you just need the script, but you may also want a current copy of the ansible `group vars <https://raw.githubusercontent.com/GlobalDataverseCommunityConsortium/dataverse-ansible/master/defaults/main.yml>`_ file.
 

From fe3b9012eabbeb17a940971f0ac4d23df4428d9e Mon Sep 17 00:00:00 2001
From: jingma <ma.jing@outlook.com>
Date: Thu, 16 Jul 2020 20:49:04 +0200
Subject: [PATCH 56/57] 3648 Change "DVN" to default value "producer" and only
 write the first geoBndBox element with four geo coordinates.

---
 .../dataverse/export/ddi/DdiExportUtil.java   | 37 ++++++++++---------
 .../dataverse/export/ddi/dataset-finch1.xml   |  2 +-
 .../iq/dataverse/export/ddi/exportfull.xml    |  8 +---
 3 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
index 04ec3056752..b5d8890b352 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
@@ -328,7 +328,7 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase
     
     private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException{
         xmlw.writeStartElement("verStmt");
-        writeAttribute(xmlw,"source","DVN"); 
+        writeAttribute(xmlw,"source","producer"); 
         xmlw.writeStartElement("version");
         writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10));
         writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); 
@@ -436,24 +436,27 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset
                     if (DatasetFieldConstant.geographicBoundingBox.equals(fieldDTO.getTypeName())) {
 
                         for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) {
-                            xmlw.writeStartElement("geoBndBox");
-                            for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) {
-                                FieldDTO next = iterator.next();
-                                if (DatasetFieldConstant.westLongitude.equals(next.getTypeName())) {
-                                    writeFullElement(xmlw, "westBL", next.getSinglePrimitive());
-                                }
-                                if (DatasetFieldConstant.eastLongitude.equals(next.getTypeName())) {
-                                    writeFullElement(xmlw, "eastBL", next.getSinglePrimitive());
-                                }
-                                if (DatasetFieldConstant.northLatitude.equals(next.getTypeName())) {
-                                    writeFullElement(xmlw, "northBL", next.getSinglePrimitive());
-                                }  
-                                if (DatasetFieldConstant.southLatitude.equals(next.getTypeName())) {
-                                    writeFullElement(xmlw, "southBL", next.getSinglePrimitive());
-                                }                               
+                            if(foo.size() == 4) {
+                                xmlw.writeStartElement("geoBndBox");
+                                for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext(); ) {
+                                    FieldDTO next = iterator.next();
+                                    if (DatasetFieldConstant.westLongitude.equals(next.getTypeName())) {
+                                        writeFullElement(xmlw, "westBL", next.getSinglePrimitive());
+                                    }
+                                    if (DatasetFieldConstant.eastLongitude.equals(next.getTypeName())) {
+                                        writeFullElement(xmlw, "eastBL", next.getSinglePrimitive());
+                                    }
+                                    if (DatasetFieldConstant.northLatitude.equals(next.getTypeName())) {
+                                        writeFullElement(xmlw, "northBL", next.getSinglePrimitive());
+                                    }
+                                    if (DatasetFieldConstant.southLatitude.equals(next.getTypeName())) {
+                                        writeFullElement(xmlw, "southBL", next.getSinglePrimitive());
+                                    }
 
+                                }
+                                xmlw.writeEndElement();
+                                break;
                             }
-                            xmlw.writeEndElement();
                         }
 
                     }
diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml
index 79e9e363994..8eb57721a16 100644
--- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml
+++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml
@@ -7,7 +7,7 @@
         <IDNo agency="DOI">doi:10.5072/FK2/PCA2E3</IDNo>
       </titlStmt>
       <distStmt/>
-      <verStmt source="DVN">
+      <verStmt source="producer">
         <version date="2015-09-24" type="RELEASED">1</version>
       </verStmt>
       <biblCit>Finch, Fiona, 2015, "Darwin's Finches", https://doi.org/10.5072/FK2/PCA2E3,  Root Dataverse,  V1</biblCit>
diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml b/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml
index d9be1217fc9..d409db66665 100644
--- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml
+++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml
@@ -10,7 +10,7 @@
         <distrbtr source="archive">Root</distrbtr>
         <distDate>2020-02-19</distDate>
       </distStmt>
-      <verStmt source="DVN">
+      <verStmt source="producer">
         <version date="2020-02-19" type="RELEASED">1</version>
       </verStmt>
       <biblCit>LastAuthor1, FirstAuthor1; LastAuthor2, FirstAuthor2, 2020, "Replication Data for: Title", https://doi.org/10.5072/FK2/WKUKGV, Root, V1</biblCit>
@@ -93,12 +93,6 @@
           <northBL>30</northBL>
           <southBL>40</southBL>
         </geoBndBox>
-        <geoBndBox>
-          <southBL>80</southBL>
-          <northBL>70</northBL>
-          <eastBL>60</eastBL>
-          <westBL>50</westBL>
-        </geoBndBox>
         <geogUnit>GeographicUnit1</geogUnit>
         <geogUnit>GeographicUnit2</geogUnit>
         <anlyUnit>UnitOfAnalysis1</anlyUnit>

From 6942c901adf7728cdfa8a5fb212aa33f9012b5f1 Mon Sep 17 00:00:00 2001
From: jingma <ma.jing@outlook.com>
Date: Wed, 22 Jul 2020 06:29:01 +0200
Subject: [PATCH 57/57] 3648 Change value to archive.

---
 .../dataverse/export/ddi/DdiExportUtil.java   | 37 +++++++++----------
 .../dataverse/export/ddi/dataset-finch1.xml   |  2 +-
 .../iq/dataverse/export/ddi/exportfull.xml    |  8 +++-
 3 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
index b5d8890b352..27ce401447f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
@@ -328,7 +328,7 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase
     
     private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException{
         xmlw.writeStartElement("verStmt");
-        writeAttribute(xmlw,"source","producer"); 
+        writeAttribute(xmlw,"source","archive"); 
         xmlw.writeStartElement("version");
         writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10));
         writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); 
@@ -436,27 +436,24 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset
                     if (DatasetFieldConstant.geographicBoundingBox.equals(fieldDTO.getTypeName())) {
 
                         for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) {
-                            if(foo.size() == 4) {
-                                xmlw.writeStartElement("geoBndBox");
-                                for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext(); ) {
-                                    FieldDTO next = iterator.next();
-                                    if (DatasetFieldConstant.westLongitude.equals(next.getTypeName())) {
-                                        writeFullElement(xmlw, "westBL", next.getSinglePrimitive());
-                                    }
-                                    if (DatasetFieldConstant.eastLongitude.equals(next.getTypeName())) {
-                                        writeFullElement(xmlw, "eastBL", next.getSinglePrimitive());
-                                    }
-                                    if (DatasetFieldConstant.northLatitude.equals(next.getTypeName())) {
-                                        writeFullElement(xmlw, "northBL", next.getSinglePrimitive());
-                                    }
-                                    if (DatasetFieldConstant.southLatitude.equals(next.getTypeName())) {
-                                        writeFullElement(xmlw, "southBL", next.getSinglePrimitive());
-                                    }
-
+                            xmlw.writeStartElement("geoBndBox");
+                            for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) {
+                                FieldDTO next = iterator.next();
+                                if (DatasetFieldConstant.westLongitude.equals(next.getTypeName())) {
+                                    writeFullElement(xmlw, "westBL", next.getSinglePrimitive());
+                                }
+                                if (DatasetFieldConstant.eastLongitude.equals(next.getTypeName())) {
+                                    writeFullElement(xmlw, "eastBL", next.getSinglePrimitive());
+                                }
+                                if (DatasetFieldConstant.northLatitude.equals(next.getTypeName())) {
+                                    writeFullElement(xmlw, "northBL", next.getSinglePrimitive());
                                 }
-                                xmlw.writeEndElement();
-                                break;
+                                if (DatasetFieldConstant.southLatitude.equals(next.getTypeName())) {
+                                    writeFullElement(xmlw, "southBL", next.getSinglePrimitive());
+                                }
+
                             }
+                            xmlw.writeEndElement();
                         }
 
                     }
diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml
index 8eb57721a16..70a2e139929 100644
--- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml
+++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml
@@ -7,7 +7,7 @@
         <IDNo agency="DOI">doi:10.5072/FK2/PCA2E3</IDNo>
       </titlStmt>
       <distStmt/>
-      <verStmt source="producer">
+      <verStmt source="archive">
         <version date="2015-09-24" type="RELEASED">1</version>
       </verStmt>
       <biblCit>Finch, Fiona, 2015, "Darwin's Finches", https://doi.org/10.5072/FK2/PCA2E3,  Root Dataverse,  V1</biblCit>
diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml b/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml
index d409db66665..9b803c9f1bb 100644
--- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml
+++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml
@@ -10,7 +10,7 @@
         <distrbtr source="archive">Root</distrbtr>
         <distDate>2020-02-19</distDate>
       </distStmt>
-      <verStmt source="producer">
+      <verStmt source="archive">
         <version date="2020-02-19" type="RELEASED">1</version>
       </verStmt>
       <biblCit>LastAuthor1, FirstAuthor1; LastAuthor2, FirstAuthor2, 2020, "Replication Data for: Title", https://doi.org/10.5072/FK2/WKUKGV, Root, V1</biblCit>
@@ -93,6 +93,12 @@
           <northBL>30</northBL>
           <southBL>40</southBL>
         </geoBndBox>
+        <geoBndBox>
+          <southBL>80</southBL>
+          <northBL>70</northBL>
+          <eastBL>60</eastBL>
+          <westBL>50</westBL>
+        </geoBndBox>
         <geogUnit>GeographicUnit1</geogUnit>
         <geogUnit>GeographicUnit2</geogUnit>
         <anlyUnit>UnitOfAnalysis1</anlyUnit>