From 2dc84db8f202010db091f987aacd92113731ce8f Mon Sep 17 00:00:00 2001 From: Ludovic DANIEL Date: Wed, 20 Dec 2023 17:38:57 +0100 Subject: [PATCH 001/622] #9317 - Allowing to delete saved search --- .../iq/dataverse/api/SavedSearches.java | 14 +++-- .../savedsearch/SavedSearchServiceBean.java | 62 ++++++++++++++++--- 2 files changed, 61 insertions(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/SavedSearches.java b/src/main/java/edu/harvard/iq/dataverse/api/SavedSearches.java index 5d0365d022e..cc1d7483c29 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/SavedSearches.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/SavedSearches.java @@ -173,16 +173,18 @@ public Response add(JsonObject body) { @DELETE @Path("{id}") - public Response delete(@PathParam("id") long doomedId) { - boolean disabled = true; - if (disabled) { - return error(BAD_REQUEST, "Saved Searches can not safely be deleted because links can not safely be deleted. See https://github.com/IQSS/dataverse/issues/1364 for details."); - } + public Response delete(@PathParam("id") long doomedId, @QueryParam("unlink") boolean unlink) { SavedSearch doomed = savedSearchSvc.find(doomedId); if (doomed == null) { return error(NOT_FOUND, "Could not find saved search id " + doomedId); } - boolean wasDeleted = savedSearchSvc.delete(doomedId); + boolean wasDeleted; + try { + wasDeleted = savedSearchSvc.delete(doomedId, unlink); + } catch (Exception e) { + return error(INTERNAL_SERVER_ERROR, "Problem while trying to unlink links of saved search id " + doomedId); + } + if (wasDeleted) { return ok(Json.createObjectBuilder().add("Deleted", doomedId)); } else { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java index 7fc2bdf79a3..eded3a7af8a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java @@ -2,29 +2,28 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetLinkingDataverse; +import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseLinkingDataverse; +import edu.harvard.iq.dataverse.DataverseLinkingServiceBean; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.GuestUser; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.search.SearchServiceBean; -import edu.harvard.iq.dataverse.search.SolrQueryResponse; -import edu.harvard.iq.dataverse.search.SolrSearchResult; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetLinkingDataverseCommand; +import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseLinkingDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.LinkDatasetCommand; import edu.harvard.iq.dataverse.engine.command.impl.LinkDataverseCommand; import edu.harvard.iq.dataverse.search.SearchException; import edu.harvard.iq.dataverse.search.SearchFields; +import edu.harvard.iq.dataverse.search.SearchServiceBean; +import edu.harvard.iq.dataverse.search.SolrQueryResponse; +import edu.harvard.iq.dataverse.search.SolrSearchResult; import edu.harvard.iq.dataverse.search.SortBy; import edu.harvard.iq.dataverse.util.SystemConfig; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.logging.Level; -import java.util.logging.Logger; import jakarta.ejb.EJB; import jakarta.ejb.Schedule; import jakarta.ejb.Stateless; @@ -39,6 +38,12 @@ import jakarta.persistence.TypedQuery; import jakarta.servlet.http.HttpServletRequest; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + @Stateless @Named public class SavedSearchServiceBean { @@ -50,6 +55,10 @@ public class SavedSearchServiceBean { @EJB DvObjectServiceBean dvObjectService; @EJB + protected DatasetLinkingServiceBean dsLinkingService; + @EJB + protected DataverseLinkingServiceBean dvLinkingService; + @EJB EjbDataverseEngine commandEngine; @EJB SystemConfig systemConfig; @@ -101,11 +110,15 @@ public SavedSearch add(SavedSearch toPersist) { return persisted; } - public boolean delete(long id) { + public boolean delete(long id, boolean unlink) throws SearchException, CommandException { SavedSearch doomed = find(id); boolean wasDeleted = false; if (doomed != null) { System.out.println("deleting saved search id " + doomed.getId()); + if(unlink) { + DataverseRequest dataverseRequest = new DataverseRequest(doomed.getCreator(), getHttpServletRequest()); + unLinksForSingleSavedSearch(dataverseRequest, doomed); + } em.remove(doomed); em.flush(); wasDeleted = true; @@ -240,6 +253,37 @@ public JsonObjectBuilder makeLinksForSingleSavedSearch(DataverseRequest dvReq, S return response; } + public void unLinksForSingleSavedSearch(DataverseRequest dvReq, SavedSearch savedSearch) throws SearchException, CommandException { + logger.info("UNLINK SAVED SEARCH (" + savedSearch.getId() + ") START search and unlink process"); + Date start = new Date(); + Dataverse linkingDataverse = savedSearch.getDefinitionPoint(); + + SolrQueryResponse queryResponse = findHits(savedSearch); + for (SolrSearchResult solrSearchResult : queryResponse.getSolrSearchResults()) { + + DvObject dvObjectThatDefinitionPointWillLinkTo = dvObjectService.findDvObject(solrSearchResult.getEntityId()); + if (dvObjectThatDefinitionPointWillLinkTo == null) { + continue; + } + + if (dvObjectThatDefinitionPointWillLinkTo.isInstanceofDataverse()) { + Dataverse linkedDataverse = (Dataverse) dvObjectThatDefinitionPointWillLinkTo; + DataverseLinkingDataverse dvld = dvLinkingService.findDataverseLinkingDataverse(linkedDataverse.getId(), linkingDataverse.getId()); + if(dvld != null) { + Dataverse dv = commandEngine.submitInNewTransaction(new DeleteDataverseLinkingDataverseCommand(dvReq, linkingDataverse, dvld, true)); + } + } else if (dvObjectThatDefinitionPointWillLinkTo.isInstanceofDataset()) { + Dataset linkedDataset = (Dataset) dvObjectThatDefinitionPointWillLinkTo; + DatasetLinkingDataverse dsld = dsLinkingService.findDatasetLinkingDataverse(linkedDataset.getId(), linkingDataverse.getId()); + if(dsld != null) { + Dataset ds = commandEngine.submitInNewTransaction(new DeleteDatasetLinkingDataverseCommand(dvReq, linkedDataset, dsld, true)); + } + } + } + + logger.info("UNLINK SAVED SEARCH (" + savedSearch.getId() + ") total time in ms: " + (new Date().getTime() - start.getTime())); + } + private SolrQueryResponse findHits(SavedSearch savedSearch) throws SearchException { String sortField = SearchFields.TYPE; // first return dataverses, then datasets String sortOrder = SortBy.DESCENDING; From ac80e0a3fcb0a2f98839920c86b4f2abd47539b5 Mon Sep 17 00:00:00 2001 From: Ludovic DANIEL Date: Fri, 5 Jan 2024 17:26:44 +0100 Subject: [PATCH 002/622] #9317 - Adding release note --- doc/release-notes/9317-delete-saved-search.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 doc/release-notes/9317-delete-saved-search.md diff --git a/doc/release-notes/9317-delete-saved-search.md b/doc/release-notes/9317-delete-saved-search.md new file mode 100644 index 00000000000..5fb6edd3642 --- /dev/null +++ b/doc/release-notes/9317-delete-saved-search.md @@ -0,0 +1,4 @@ +### Saved search deletion + +Saved search can now be removed using API `/api/admin/savedsearches/$id`. See PR #10198. +This is reflected in the [Saved Search Native API section](https://guides.dataverse.org/en/latest/api/native-api.html#saved-search) of the Guide. \ No newline at end of file From e1bf3c35f2c085d6831d8f3859a123da80745741 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Konrad=20Per=C5=82owski?= Date: Mon, 15 Jan 2024 12:45:02 +0100 Subject: [PATCH 003/622] Add CrossRef provider logic --- .../source/installation/config.rst | 9 + pom.xml | 5 + .../iq/dataverse/CrossRefRESTfullClient.java | 118 +++++++ .../dataverse/DOICrossRefRegisterService.java | 313 ++++++++++++++++++ .../iq/dataverse/DOICrossRefServiceBean.java | 117 +++++++ .../iq/dataverse/EjbDataverseEngine.java | 8 + .../iq/dataverse/GlobalIdServiceBean.java | 3 +- .../engine/command/CommandContext.java | 24 +- .../iq/dataverse/settings/JvmSettings.java | 11 +- .../dataverse/crossref_metadata_template.xml | 29 ++ .../dataverse/engine/TestCommandContext.java | 5 + 11 files changed, 619 insertions(+), 23 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/CrossRefRESTfullClient.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/DOICrossRefRegisterService.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/DOICrossRefServiceBean.java create mode 100644 src/main/resources/edu/harvard/iq/dataverse/crossref_metadata_template.xml diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a7d7905ca4a..e7ba624222e 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -245,6 +245,15 @@ this provider. - :ref:`dataverse.pid.ezid.username` - :ref:`dataverse.pid.ezid.password` +**JVM Options for CrossRef:** + +- :ref:`dataverse.pid.crossref.url` +- :ref:`dataverse.pid.crossref.rest-api-url` +- :ref:`dataverse.pid.crossref.username` +- :ref:`dataverse.pid.crossref.password` +- :ref:`dataverse.pid.crossref.depositor` +- :ref:`dataverse.pid.crossref.depositor-email` + **Database Settings:** - :ref:`:DoiProvider <:DoiProvider>` diff --git a/pom.xml b/pom.xml index 7c12a45135c..72ef3391524 100644 --- a/pom.xml +++ b/pom.xml @@ -653,6 +653,11 @@ 3.2.0 test + + org.apache.httpcomponents + fluent-hc + 4.5.14 + diff --git a/src/main/java/edu/harvard/iq/dataverse/CrossRefRESTfullClient.java b/src/main/java/edu/harvard/iq/dataverse/CrossRefRESTfullClient.java new file mode 100644 index 00000000000..4b6728eca57 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/CrossRefRESTfullClient.java @@ -0,0 +1,118 @@ +package edu.harvard.iq.dataverse; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.client.CredentialsProvider; +import org.apache.http.client.fluent.Request; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.entity.ContentType; +import org.apache.http.entity.mime.HttpMultipartMode; +import org.apache.http.entity.mime.MultipartEntityBuilder; +import org.apache.http.impl.client.BasicCredentialsProvider; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class CrossRefRESTfullClient implements Closeable { + + private static final Logger logger = Logger.getLogger(CrossRefRESTfullClient.class.getCanonicalName()); + + private final String url; + private final String apiUrl; + private final String username; + private final String password; + private final CloseableHttpClient httpClient; + private final HttpClientContext context; + private final String encoding = "utf-8"; + + public CrossRefRESTfullClient(String url, String apiUrl, String username, String password) { + this.url = url; + this.apiUrl = apiUrl; + this.username = username; + this.password = password; + try { + context = HttpClientContext.create(); + CredentialsProvider credsProvider = new BasicCredentialsProvider(); + credsProvider.setCredentials(new AuthScope(null, -1), + new UsernamePasswordCredentials(username, password)); + context.setCredentialsProvider(credsProvider); + + httpClient = HttpClients.createDefault(); + } catch (Exception ioe) { + close(); + logger.log(Level.SEVERE,"Fail to init Client",ioe); + throw new RuntimeException("Fail to init Client", ioe); + } + } + + public void close() { + if (this.httpClient != null) { + try { + httpClient.close(); + } catch (IOException io) { + logger.warning("IOException closing hhtpClient: " + io.getMessage()); + } + } + } + + public String getMetadata(String doi) { + HttpGet httpGet = new HttpGet(this.apiUrl + "/works/" + doi); + httpGet.setHeader("Accept", "application/json"); + try { + HttpResponse response = httpClient.execute(httpGet); + String data = EntityUtils.toString(response.getEntity(), encoding); + if (response.getStatusLine().getStatusCode() != 200) { + String errMsg = "Response from getMetadata: " + response.getStatusLine().getStatusCode() + ", " + data; + logger.info(errMsg); + throw new RuntimeException(errMsg); + } + return data; + } catch (IOException ioe) { + logger.info("IOException when get metadata"); + throw new RuntimeException("IOException when get metadata", ioe); + } + } + + public String postMetadata(String xml) throws IOException { + HttpEntity entity = MultipartEntityBuilder.create() + .addTextBody("operation", "doMDUpload") + .addTextBody("login_id", username) + .addTextBody("login_passwd", password) + .addBinaryBody("fname", xml.getBytes(StandardCharsets.UTF_8), ContentType.APPLICATION_XML, "metadata.xml") + .setMode(HttpMultipartMode.BROWSER_COMPATIBLE) + .build(); + HttpResponse response = Request.Post(url + "/servlet/deposit") + .body(entity) + .setHeader("Accept", "*/*") + .execute().returnResponse(); + + String data = EntityUtils.toString(response.getEntity(), encoding); + if (response.getStatusLine().getStatusCode() != 200) { + String errMsg = "Response from postMetadata: " + response.getStatusLine().getStatusCode() + ", " + data; + logger.info(errMsg); + throw new IOException(errMsg); + } + return data; + } + + public boolean testDOIExists(String doi) throws IOException { + HttpGet httpGet = new HttpGet(this.apiUrl + "/works/" + doi); + httpGet.setHeader("Accept", "application/json"); + HttpResponse response = httpClient.execute(httpGet); + if (response.getStatusLine().getStatusCode() != 200) { + EntityUtils.consumeQuietly(response.getEntity()); + return false; + } + EntityUtils.consumeQuietly(response.getEntity()); + return true; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/DOICrossRefRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOICrossRefRegisterService.java new file mode 100644 index 00000000000..6e8ed20d570 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/DOICrossRefRegisterService.java @@ -0,0 +1,313 @@ +package edu.harvard.iq.dataverse; + +import com.fasterxml.jackson.databind.ObjectMapper; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import org.apache.commons.text.StringEscapeUtils; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static edu.harvard.iq.dataverse.util.SystemConfig.getDataverseSiteUrlStatic; + +@Stateless +public class DOICrossRefRegisterService { + private static final Logger logger = Logger.getLogger(DOICrossRefRegisterService.class.getCanonicalName()); + + @EJB + DataverseServiceBean dataverseService; + + private CrossRefRESTfullClient client = null; + + private CrossRefRESTfullClient getClient() { + if (client == null) { + client = new CrossRefRESTfullClient( + JvmSettings.CROSSREF_URL.lookup(), + JvmSettings.CROSSREF_REST_API_URL.lookup(), + JvmSettings.CROSSREF_USERNAME.lookup(), + JvmSettings.CROSSREF_PASSWORD.lookup() + ); + } + return client; + } + + public boolean testDOIExists(String identifier) { + boolean doiExists; + try { + CrossRefRESTfullClient client = getClient(); + doiExists = client.testDOIExists(identifier.substring(identifier.indexOf(":") + 1)); + } catch (Exception e) { + logger.log(Level.INFO, identifier, e); + return false; + } + return doiExists; + } + + public HashMap getMetadata(String identifier) throws IOException { + HashMap metadata = new HashMap<>(); + try { + CrossRefRESTfullClient client = getClient(); + String jsonMetadata = client.getMetadata(identifier.substring(identifier.indexOf(":") + 1)); + Map mappedJson = new ObjectMapper().readValue(jsonMetadata, HashMap.class); + logger.log(Level.FINE, jsonMetadata); + metadata.put("_status", mappedJson.get("status").toString()); + } catch (RuntimeException e) { + logger.log(Level.INFO, identifier, e); + } + return metadata; + } + + public String reserveIdentifier(String identifier, DvObject dvObject) throws IOException { + logger.info("Crossref reserveIdentifier"); + String xmlMetadata = getMetadataFromDvObject(identifier, dvObject); + + CrossRefRESTfullClient client = getClient(); + return client.postMetadata(xmlMetadata); + } + + public void modifyIdentifier(String identifier, DvObject dvObject) throws IOException { + logger.info("Crossref modifyIdentifier"); + String xmlMetadata = getMetadataFromDvObject(identifier, dvObject); + + CrossRefRESTfullClient client = getClient(); + client.postMetadata(xmlMetadata); + } + + public String getMetadataFromDvObject(String identifier, DvObject dvObject) { + Dataset dataset; + + if (dvObject instanceof Dataset) { + dataset = (Dataset) dvObject; + } else { + dataset = (Dataset) dvObject.getOwner(); + } + + CrossRefMetadataTemplate metadataTemplate = new CrossRefMetadataTemplate(); + metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); + metadataTemplate.setDepositor(JvmSettings.CROSSREF_DEPOSITOR.lookup()); + metadataTemplate.setDepositorEmail(JvmSettings.CROSSREF_DEPOSITOR_EMAIL.lookup()); + metadataTemplate.setInstitution(dataverseService.getRootDataverseName()); + + String title = dvObject.getCurrentName(); + if (dvObject.isInstanceofDataFile()) { + //Note file title is not currently escaped the way the dataset title is, so adding it here. + title = StringEscapeUtils.escapeXml10(title); + } + + if (title.isEmpty() || title.equals(DatasetField.NA_VALUE)) { + title = AbstractGlobalIdServiceBean.UNAVAILABLE; + } + + metadataTemplate.setTitle(title); + + return metadataTemplate.generateXML(); + } +} + +class CrossRefMetadataTemplate { + + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.edu.harvard.iq.dataverse.CrossRefMetadataTemplate"); + private static String template; + + static { + try (InputStream in = CrossRefMetadataTemplate.class.getResourceAsStream("crossref_metadata_template.xml")) { + template = CrossRefFileUtil.readAndClose(in, "utf-8"); + } catch (Exception e) { + logger.log(Level.SEVERE, "crossref metadata template load error"); + logger.log(Level.SEVERE, "String " + e); + logger.log(Level.SEVERE, "localized message " + e.getLocalizedMessage()); + logger.log(Level.SEVERE, "cause " + e.getCause()); + logger.log(Level.SEVERE, "message " + e.getMessage()); + } + } + + private final String timestamp = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); + private String institution; + private String depositor; + private String depositorEmail; + private String databaseTitle; + private String identifier; + private String title; + private final String baseUrl = getDataverseSiteUrlStatic(); + private List authors; + + public List getAuthors() { + return authors; + } + + public void setAuthors(List authors) { + this.authors = authors; + } + + public CrossRefMetadataTemplate() { + } + + public String generateXML() { + String xmlMetadata = template.replace("${depositor}", depositor) + .replace("${depositorEmail}", depositorEmail) + .replace("${title}", title) + .replace("${institution}", institution) + .replace("${batchId}", identifier + " " + timestamp) + .replace("${timestamp}", timestamp); + + StringBuilder datasetElement = new StringBuilder(); + datasetElement.append(""); + + StringBuilder contributorsElement = new StringBuilder(); + if (authors != null && !authors.isEmpty()) { + contributorsElement.append(""); + for (DatasetAuthor author : authors) { + contributorsElement.append(""); + contributorsElement.append(author.getName().getDisplayValue()); + contributorsElement.append(""); + contributorsElement.append(author.getName().getDisplayValue()); + contributorsElement.append(""); + + if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { + contributorsElement.append("") + .append(author.getAffiliation().getDisplayValue()) + .append(""); + } + + if (author.getIdType() != null && + author.getIdValue() != null && + !author.getIdType().isEmpty() && + !author.getIdValue().isEmpty() && + author.getAffiliation() != null && + !author.getAffiliation().getDisplayValue().isEmpty()) { + if (author.getIdType().equals("ORCID")) { + contributorsElement.append("").append("https://orcid.org/").append(author.getIdValue()).append(""); + } + if (author.getIdType().equals("ISNI")) { + contributorsElement.append("").append(author.getIdValue()).append(""); + } + if (author.getIdType().equals("LCNA")) { + contributorsElement.append("").append(author.getIdValue()).append(""); + } + } + + contributorsElement.append(""); + } + contributorsElement.append(""); + + } else { + contributorsElement.append("") + .append(AbstractGlobalIdServiceBean.UNAVAILABLE) + .append(""); + } + + datasetElement.append(contributorsElement); + + datasetElement.append("") + .append(this.title) + .append(""); + + datasetElement.append("") + .append(this.identifier) + .append("") + .append(this.baseUrl).append("/dataset.xhtml?persistentId=doi:").append(this.identifier) + .append(""); + + datasetElement.append(""); + xmlMetadata = xmlMetadata.replace("${datasets}", datasetElement.toString()); + return xmlMetadata; + } + + public static String getTemplate() { + return template; + } + + public static void setTemplate(String template) { + CrossRefMetadataTemplate.template = template; + } + + public String getIdentifier() { + return identifier; + } + + public String getDepositor() { + return depositor; + } + + public void setDepositor(String depositor) { + this.depositor = depositor; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getInstitution() { + return institution; + } + + public void setInstitution(String institution) { + this.institution = institution; + } + + public String getDepositorEmail() { + return depositorEmail; + } + + public void setDepositorEmail(String depositorEmail) { + this.depositorEmail = depositorEmail; + } + + public String getDatabaseTitle() { + return databaseTitle; + } + + public void setDatabaseTitle(String databaseTitle) { + this.databaseTitle = databaseTitle; + } +} + +class CrossRefFileUtil { + + public static void close(InputStream in) { + if (in != null) { + try { + in.close(); + } catch (IOException e) { + throw new RuntimeException("Fail to close InputStream"); + } + } + } + + public static String readAndClose(InputStream inStream, String encoding) { + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + byte[] buf = new byte[128]; + String data; + try { + int cnt; + while ((cnt = inStream.read(buf)) >= 0) { + outStream.write(buf, 0, cnt); + } + data = outStream.toString(encoding); + } catch (IOException ioe) { + throw new RuntimeException("IOException"); + } finally { + close(inStream); + } + return data; + } +} + diff --git a/src/main/java/edu/harvard/iq/dataverse/DOICrossRefServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOICrossRefServiceBean.java new file mode 100644 index 00000000000..50fafc5f036 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/DOICrossRefServiceBean.java @@ -0,0 +1,117 @@ +package edu.harvard.iq.dataverse; + +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; + +@Stateless +public class DOICrossRefServiceBean extends DOIServiceBean { + private static final Logger logger = Logger.getLogger(DOICrossRefServiceBean.class.getCanonicalName()); + + @EJB + DOICrossRefRegisterService doiCrossRefRegisterService; + + + @Override + public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) throws Exception { + logger.info("CrossRef alreadyRegistered"); + if (pid == null || pid.asString().isEmpty()) { + logger.fine("No identifier sent."); + return false; + } + boolean alreadyExists; + String identifier = pid.asString(); + try { + alreadyExists = doiCrossRefRegisterService.testDOIExists(identifier); + } catch (Exception e) { + logger.log(Level.WARNING, "alreadyExists failed"); + return false; + } + return alreadyExists; + } + + @Override + public boolean registerWhenPublished() { + return true; + } + + @Override + public List getProviderInformation() { + return List.of("CrossRef", "https://status.crossref.org/"); + } + + @Override + protected String getProviderKeyName() { + return "CrossRef"; + } + + @Override + public String createIdentifier(DvObject dvObject) throws Throwable { + logger.info("CrossRef createIdentifier"); + if (dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty()) { + dvObject = generateIdentifier(dvObject); + } + String identifier = getIdentifier(dvObject); + try { + String retString = doiCrossRefRegisterService.reserveIdentifier(identifier, dvObject); + logger.log(Level.FINE, "CrossRef create DOI identifier retString : " + retString); + return retString; + } catch (Exception e) { + logger.log(Level.WARNING, "CrossRef Identifier not created: create failed", e); + throw e; + } + } + + @Override + public Map getIdentifierMetadata(DvObject dvObject) { + logger.info("CrossRef getIdentifierMetadata"); + String identifier = getIdentifier(dvObject); + Map metadata = new HashMap<>(); + try { + metadata = doiCrossRefRegisterService.getMetadata(identifier); + } catch (Exception e) { + logger.log(Level.WARNING, "getIdentifierMetadata failed", e); + } + return metadata; + } + + @Override + public String modifyIdentifierTargetURL(DvObject dvObject) throws Exception { + logger.info("CrossRef modifyIdentifier"); + String identifier = getIdentifier(dvObject); + try { + doiCrossRefRegisterService.modifyIdentifier(identifier, dvObject); + } catch (Exception e) { + logger.log(Level.WARNING, "modifyMetadata failed", e); + throw e; + } + return identifier; + } + + @Override + public void deleteIdentifier(DvObject dvo) throws Exception { + logger.info("CrossRef deleteIdentifier"); + } + + @Override + public boolean publicizeIdentifier(DvObject dvObject) { + logger.info("CrossRef updateIdentifierStatus"); + if (dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty()) { + dvObject = generateIdentifier(dvObject); + } + String identifier = getIdentifier(dvObject); + + try { + doiCrossRefRegisterService.reserveIdentifier(identifier, dvObject); + return true; + } catch (Exception e) { + logger.log(Level.WARNING, "modifyMetadata failed: " + e.getMessage(), e); + return false; + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java index 5a689c06019..cfb17caf5a0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java +++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java @@ -119,6 +119,9 @@ public class EjbDataverseEngine { @EJB DOIDataCiteServiceBean doiDataCite; + @EJB + DOICrossRefServiceBean doiCrossRef; + @EJB FakePidProviderServiceBean fakePidProvider; @@ -493,6 +496,11 @@ public DOIDataCiteServiceBean doiDataCite() { return doiDataCite; } + @Override + public DOICrossRefServiceBean doiCrossRef() { + return doiCrossRef; + } + @Override public FakePidProviderServiceBean fakePidProvider() { return fakePidProvider; diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java index aebf13778c3..ec96bcf603f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java @@ -204,8 +204,9 @@ class BeanDispatcher { switch ( doiProvider ) { case "EZID": return ctxt.doiEZId(); case "DataCite": return ctxt.doiDataCite(); + case "CrossRef": return ctxt.doiCrossRef(); case "FAKE": return ctxt.fakePidProvider(); - default: + default: logger.log(Level.SEVERE, "Unknown doiProvider: {0}", doiProvider); return null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java index f74c1222bb0..596a76be172 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java @@ -1,29 +1,9 @@ package edu.harvard.iq.dataverse.engine.command; -import edu.harvard.iq.dataverse.DOIDataCiteServiceBean; -import edu.harvard.iq.dataverse.DOIEZIdServiceBean; -import edu.harvard.iq.dataverse.HandlenetServiceBean; -import edu.harvard.iq.dataverse.DataFileServiceBean; -import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DatasetVersionServiceBean; -import edu.harvard.iq.dataverse.DataverseFacetServiceBean; -import edu.harvard.iq.dataverse.DataverseFieldTypeInputLevelServiceBean; -import edu.harvard.iq.dataverse.DataverseLinkingServiceBean; -import edu.harvard.iq.dataverse.DataverseRoleServiceBean; -import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; -import edu.harvard.iq.dataverse.DvObjectServiceBean; -import edu.harvard.iq.dataverse.FeaturedDataverseServiceBean; -import edu.harvard.iq.dataverse.FileDownloadServiceBean; -import edu.harvard.iq.dataverse.GuestbookResponseServiceBean; -import edu.harvard.iq.dataverse.GuestbookServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; -import edu.harvard.iq.dataverse.PermissionServiceBean; -import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; import edu.harvard.iq.dataverse.search.SearchServiceBean; -import edu.harvard.iq.dataverse.TemplateServiceBean; -import edu.harvard.iq.dataverse.UserNotificationServiceBean; import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.groups.GroupServiceBean; @@ -104,6 +84,8 @@ public interface CommandContext { public DOIDataCiteServiceBean doiDataCite(); + public DOICrossRefServiceBean doiCrossRef(); + public FakePidProviderServiceBean fakePidProvider(); public HandlenetServiceBean handleNet(); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 3bc06738a7e..0600175deb5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -104,7 +104,16 @@ public enum JvmSettings { DATACITE_REST_API_URL(SCOPE_PID_DATACITE, "rest-api-url", "doi.dataciterestapiurlstring", "doi.mdcbaseurlstring"), DATACITE_USERNAME(SCOPE_PID_DATACITE, "username", "doi.username"), DATACITE_PASSWORD(SCOPE_PID_DATACITE, "password", "doi.password"), - + + // PROVIDER CROSSREF + SCOPE_PID_CROSSREF(SCOPE_PID, "crossref"), + CROSSREF_URL(SCOPE_PID_CROSSREF, "url"), + CROSSREF_REST_API_URL(SCOPE_PID_CROSSREF, "rest-api-url"), + CROSSREF_USERNAME(SCOPE_PID_CROSSREF, "username", "doi.username"), + CROSSREF_PASSWORD(SCOPE_PID_CROSSREF, "password", "doi.password"), + CROSSREF_DEPOSITOR(SCOPE_PID_CROSSREF, "depositor"), + CROSSREF_DEPOSITOR_EMAIL(SCOPE_PID_CROSSREF, "depositor-email"), + // PROVIDER PERMALINK SCOPE_PID_PERMALINK(SCOPE_PID, "permalink"), PERMALINK_BASEURL(SCOPE_PID_PERMALINK, "base-url", "perma.baseurlstring"), diff --git a/src/main/resources/edu/harvard/iq/dataverse/crossref_metadata_template.xml b/src/main/resources/edu/harvard/iq/dataverse/crossref_metadata_template.xml new file mode 100644 index 00000000000..f37ed63cfff --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/crossref_metadata_template.xml @@ -0,0 +1,29 @@ + + + + ${batchId} + ${timestamp} + + ${depositor} + ${depositorEmail} + + Crossref + + + + + + ${title} + + + ${institution} + + + ${datasets} + + + diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java b/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java index a80adb33b8d..15810f85d39 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java @@ -131,6 +131,11 @@ public DOIDataCiteServiceBean doiDataCite() { return null; } + @Override + public DOICrossRefServiceBean doiCrossRef() { + return null; + } + @Override public FakePidProviderServiceBean fakePidProvider() { return null; From a08fbe2b345dd9f10998b758eb962f899bd6653e Mon Sep 17 00:00:00 2001 From: Martin Amouzou <85512093+martinAmouzou@users.noreply.github.com> Date: Wed, 24 Jan 2024 16:13:02 +0100 Subject: [PATCH 004/622] Update version-control.rst --- doc/sphinx-guides/source/developers/version-control.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index c36c7d1e963..a97adf4149b 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -286,5 +286,8 @@ GitHub documents how to make changes to a fork at https://help.github.com/articl git push OdumInstitute 4709-postgresql_96 ---- +Develop branch and pull request +-------------------------------------------- +Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `. Previous: :doc:`troubleshooting` | Next: :doc:`sql-upgrade-scripts` From 8277ac5adac8511efa9bb92d3f51dcf99973e67b Mon Sep 17 00:00:00 2001 From: Martin Amouzou <85512093+martinAmouzou@users.noreply.github.com> Date: Wed, 24 Jan 2024 16:15:10 +0100 Subject: [PATCH 005/622] Update version-control.rst --- doc/sphinx-guides/source/developers/version-control.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index a97adf4149b..11ce0c0d144 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -286,8 +286,9 @@ GitHub documents how to make changes to a fork at https://help.github.com/articl git push OdumInstitute 4709-postgresql_96 ---- + Develop branch and pull request -------------------------------------------- -Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `. +Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `_. Previous: :doc:`troubleshooting` | Next: :doc:`sql-upgrade-scripts` From be8c167a6454bc86535ce77e66dedb1d76ca0b08 Mon Sep 17 00:00:00 2001 From: Martin Amouzou <85512093+martinAmouzou@users.noreply.github.com> Date: Wed, 24 Jan 2024 16:53:13 +0100 Subject: [PATCH 006/622] Update version-control.rst --- doc/sphinx-guides/source/developers/version-control.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index 11ce0c0d144..5b809782f76 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -289,6 +289,6 @@ GitHub documents how to make changes to a fork at https://help.github.com/articl Develop branch and pull request -------------------------------------------- -Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `_. +Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `_. Previous: :doc:`troubleshooting` | Next: :doc:`sql-upgrade-scripts` From 8c5113b9c3f4e8380df45d91c84fdc3acdaf7f1a Mon Sep 17 00:00:00 2001 From: Martin Amouzou <85512093+martinAmouzou@users.noreply.github.com> Date: Wed, 24 Jan 2024 17:04:48 +0100 Subject: [PATCH 007/622] Update version-control.rst --- doc/sphinx-guides/source/developers/version-control.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index 5b809782f76..35e37ac0471 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -289,6 +289,6 @@ GitHub documents how to make changes to a fork at https://help.github.com/articl Develop branch and pull request -------------------------------------------- -Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `_. +Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `_. Previous: :doc:`troubleshooting` | Next: :doc:`sql-upgrade-scripts` From 1edb8bac22046dffd5c7c21197e013bb3be80a60 Mon Sep 17 00:00:00 2001 From: Martin Amouzou <85512093+martinAmouzou@users.noreply.github.com> Date: Wed, 24 Jan 2024 17:07:58 +0100 Subject: [PATCH 008/622] Update version-control.rst --- doc/sphinx-guides/source/developers/version-control.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index 35e37ac0471..99e33164417 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -289,6 +289,6 @@ GitHub documents how to make changes to a fork at https://help.github.com/articl Develop branch and pull request -------------------------------------------- -Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `_. +Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide <#summary-of-git-commands>`_. Previous: :doc:`troubleshooting` | Next: :doc:`sql-upgrade-scripts` From 6df751346325948935582836a69b6e5378fb1b06 Mon Sep 17 00:00:00 2001 From: konradperlowski Date: Wed, 31 Jan 2024 10:31:03 +0100 Subject: [PATCH 009/622] Add CrossRef config properties description --- .../source/installation/config.rst | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index e7ba624222e..24f1d0cce12 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2370,6 +2370,50 @@ should delete the old JVM option and the wrapped password alias, then recreate as shown for :ref:`dataverse.pid.datacite.password` but with the EZID alias name. +.. _dataverse.pid.crossref.url: + +dataverse.pid.crossref.url +++++++++++++++++++++++++++ + +CrossRef url used to post metadata. + +.. _dataverse.pid.crossref.rest-api-url: + +dataverse.pid.crossref.rest-api-url ++++++++++++++++++++++++++++++++++++ + +CrossRef API url to retrieve metadata information + +.. _dataverse.pid.crossref.username: + +dataverse.pid.crossref.username ++++++++++++++++++++++++++++++++ + +CrossRef uses `HTTP Basic authentication `_ +for their APIs. +- Used in conjunction with :ref:`dataverse.pid.crossref.url` and :ref:`dataverse.pid.crossref.password`. + +.. _dataverse.pid.crossref.password: + +dataverse.pid.crossref.password ++++++++++++++++++++++++++++++++ + +- Used in conjunction with :ref:`dataverse.pid.crossref.url` and :ref:`dataverse.pid.crossref.username`. + +.. _dataverse.pid.crossref.depositor: + +dataverse.pid.crossref.depositor +++++++++++++++++++++++++++++++++ + +The entity, such as a person or organization, that deposited the Dataset in the repository + +.. _dataverse.pid.crossref.depositor-email: + +dataverse.pid.crossref.depositor-email +++++++++++++++++++++++++++++++++++++++ + +Contact email to the indicated Depositor + .. _dataverse.timerServer: dataverse.timerServer From a50f963931a671404ff5160b353aefe7d312afcb Mon Sep 17 00:00:00 2001 From: Ludovic DANIEL Date: Wed, 31 Jan 2024 16:32:05 +0100 Subject: [PATCH 010/622] #9317 - Updated API Guide --- doc/sphinx-guides/source/api/native-api.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 6591c983824..4f84c123109 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -5131,8 +5131,7 @@ The ``$identifier`` should start with an ``@`` if it's a user. Groups start with Saved Search ~~~~~~~~~~~~ -The Saved Search, Linked Dataverses, and Linked Datasets features shipped with Dataverse 4.0, but as a "`superuser-only `_" because they are **experimental** (see `#1364 `_, `#1813 `_, `#1840 `_, `#1890 `_, `#1939 `_, `#2167 `_, `#2186 `_, `#2053 `_, and `#2543 `_). The following API endpoints were added to help people with access to the "admin" API make use of these features in their current form. There is a known issue (`#1364 `_) that once a link to a Dataverse collection or dataset is created, it cannot be removed (apart from database manipulation and reindexing) which is why a ``DELETE`` endpoint for saved searches is neither documented nor functional. The Linked Dataverse collections feature is `powered by Saved Search `_ and therefore requires that the "makelinks" endpoint be executed on a periodic basis as well. - +The Saved Search, Linked Dataverses, and Linked Datasets features are only accessible to superusers except for Linking a dataset. The following API endpoints were added to help people with access to the “admin” API make use of these features in their current form, keep in mind that they are partially experimental. List all saved searches. :: GET http://$SERVER/api/admin/savedsearches/list @@ -5141,6 +5140,10 @@ List a saved search by database id. :: GET http://$SERVER/api/admin/savedsearches/$id +Delete a saved search by database id. The ``unlink=true`` query parameter unlink links (Linked dataset or Dataverse collection) related to the deleted saved search. This parameter should be well considered as you cannot know if the saved search created the links or if someone else did via other API. Also, it may be followed ``/makelinks/all`` depending on the need if other saved searches could recreate some deleted links or by reindexing some Dataverse or Dataset. :: + + DELETE http://$SERVER/api/admin/savedsearches/$id?unlink=true + Execute a saved search by database id and make links to Dataverse collections and datasets that are found. The JSON response indicates which Dataverse collections and datasets were newly linked versus already linked. The ``debug=true`` query parameter adds to the JSON response extra information about the saved search being executed (which you could also get by listing the saved search). :: PUT http://$SERVER/api/admin/savedsearches/makelinks/$id?debug=true From c4b82dfb830ba014b79552830af86a9940648456 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 6 Feb 2024 15:44:21 +0100 Subject: [PATCH 011/622] fix(deps): resolve conflicting version of MIME4J #9077 - Apache Abdera Parser, Apache Tika and RESTeasy (Testing) use MIME4J - Tika and RESTeasy use newer APIs only present since v0.8+ - Abdera is an abandoned project, uses v0.7.2 and is hopefully compatible with newer releases - v0.8.4 given by Apache Tika relies on vulnerable Apache Commons IO 2.6, we want 2.11 per dependency management. Upgrading to v0.8.7 as earliest version with 2.11 dependency --- pom.xml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pom.xml b/pom.xml index f45e8fd9033..07d73336095 100644 --- a/pom.xml +++ b/pom.xml @@ -51,6 +51,17 @@ abdera-i18n 1.1.3 + + + org.apache.james + apache-mime4j-core + 0.8.7 + + + org.apache.james + apache-mime4j-dom + 0.8.7 + + 1 ${packaging.type} Container Base Image This module provides an application server base image to be decorated with the Dataverse app. @@ -40,7 +42,7 @@ docker-build gdcc/base:${base.image.tag} - unstable + R${project.version} eclipse-temurin:${target.java.version}-jre 1000 1000 From 20c6a58915711efd9e18d8454813623ead9dbb9b Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:17:47 +0200 Subject: [PATCH 022/622] ci(ct): add missing output definitions to base image workflow matrix discovery --- .github/workflows/container_base_push.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index bfccefaf9eb..29abab230e3 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -27,6 +27,8 @@ jobs: packages: read # Only run in upstream repo - avoid unnecessary runs in forks and only for scheduled if: ${{ github.repository_owner == 'IQSS' }} + outputs: + branches: ${{ steps.matrix.outputs.branches }} steps: - name: Build branch matrix options id: matrix From e9236638eb2921b46318c3a4fb51e663ec021a56 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:20:41 +0200 Subject: [PATCH 023/622] test(ct): for testing, determine if matrix discovery works for schedule event --- .github/workflows/container_base_push.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 29abab230e3..ab91252efc6 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -34,11 +34,11 @@ jobs: id: matrix run: | # Get last three releases and include develop branch as matrix elements - if [[ "${{ github.event_name }}" == "schedule" ]]; then + #if [[ "${{ github.event_name }}" == "schedule" ]]; then echo "branches=$(curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | jq '[ .[0:3] | .[].tag_name, "develop" ]')" | tee -a "$GITHUB_OUTPUT" - else - echo "branches=['develop']" | tee -a "$GITHUB_OUTPUT" - fi + #else + # echo "branches=['develop']" | tee -a "$GITHUB_OUTPUT" + #fi build: name: Build image From def5d9e2842c09c3d647619f0cea266528c71a82 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:23:56 +0200 Subject: [PATCH 024/622] fix(ct): base image build matrix output must be on a single line Fix jq output by removing newlines and superfluous whitespace. --- .github/workflows/container_base_push.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index ab91252efc6..705b152d1bc 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -35,7 +35,9 @@ jobs: run: | # Get last three releases and include develop branch as matrix elements #if [[ "${{ github.event_name }}" == "schedule" ]]; then - echo "branches=$(curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | jq '[ .[0:3] | .[].tag_name, "develop" ]')" | tee -a "$GITHUB_OUTPUT" + echo "branches=$(curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | \ + jq '[ .[0:3] | .[].tag_name, "develop" ]')" | tr -d "\n" | tr -s " " | \ + tee -a "$GITHUB_OUTPUT" #else # echo "branches=['develop']" | tee -a "$GITHUB_OUTPUT" #fi From b028c2610db5688bf443f436e1bbdb1c7c8fcb6c Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:27:06 +0200 Subject: [PATCH 025/622] fix(ct): do not stop building other base images if one build fails --- .github/workflows/container_base_push.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 705b152d1bc..f36c5a8de2d 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -50,6 +50,7 @@ jobs: packages: read needs: discover strategy: + fail-fast: false matrix: branch: ${{ fromJson(needs.discover.outputs.branches) }} # Only run in upstream repo - avoid unnecessary runs in forks From b453a12b780c4149a3ca2ffb5469d6cfb85c5290 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:45:07 +0200 Subject: [PATCH 026/622] fix(ct): avoid costly lookup of Java version to be used via Maven, grep from XML instead --- .github/workflows/container_base_push.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index f36c5a8de2d..d6af36702f6 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -61,9 +61,9 @@ jobs: uses: actions/checkout@v4 with: ref: ${{ matrix.branch }} - - name: Determine Java version from package + - name: Determine Java version from Parent POM run: | - echo "JAVA_VERSION=$(mvn -f modules/container-base -Pct help:evaluate -Dexpression=target.java.version -q -DforceStdout)" >> ${GITHUB_ENV} + echo "JAVA_VERSION=$(grep '' modules/dataverse-parent/pom.xml | cut -f2 -d'>' | cut -f1 -d'<')" >> ${GITHUB_ENV} - name: Set up JDK ${{ env.JAVA_VERSION }} uses: actions/setup-java@v4 From cb2fac02fb9370f565ef613c0f2e57f5ce7957a0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:48:19 +0200 Subject: [PATCH 027/622] test(ct): no longer stop before Docker, but tell DMP to skip pushing --- .github/workflows/container_base_push.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index d6af36702f6..24b2640f3e3 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -74,12 +74,6 @@ jobs: cache-dependency-path: | modules/container-base/pom.xml - - name: Stop on purpose for testing - uses: actions/github-script@v3 - with: - script: | - core.setFailed('Stopped on purpose.') - # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and # on events in context of upstream because secrets. PRs run in context of forks by default! @@ -91,7 +85,7 @@ jobs: - name: Set up QEMU for multi-arch builds uses: docker/setup-qemu-action@v2 - name: Deploy multi-arch base container image to Docker Hub - run: mvn -f modules/container-base -Pct deploy -Ddocker.platforms=${{ env.PLATFORMS }} + run: mvn -f modules/container-base -Pct deploy -Ddocker.skip.push -Ddocker.platforms=${{ env.PLATFORMS }} - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} name: Push description to DockerHub From afa51c78327748d14a1e9bb6f1d6eac78e357f48 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 01:12:13 +0200 Subject: [PATCH 028/622] fix(ct): skip building and pushing the base image for v6.0 and v6.1, which is compatible with the v6.2 image --- .github/workflows/container_base_push.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 24b2640f3e3..cb83a2799bb 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -85,6 +85,8 @@ jobs: - name: Set up QEMU for multi-arch builds uses: docker/setup-qemu-action@v2 - name: Deploy multi-arch base container image to Docker Hub + # Do not build for v6.0 and v6.1. We can simply reuse the one from v6.2. + if: ${{ matrix.branch != 'v6.0' && matrix.branch != 'v6.1' }} run: mvn -f modules/container-base -Pct deploy -Ddocker.skip.push -Ddocker.platforms=${{ env.PLATFORMS }} - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} From 8afac921361fcb76752c9425f0ce2e2a5708244e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 01:12:47 +0200 Subject: [PATCH 029/622] test(ct): try to limit QEMU architectures, maybe shaving off a few seconds of setup --- .github/workflows/container_base_push.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index cb83a2799bb..ed9e2a1cc1e 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -83,7 +83,10 @@ jobs: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Set up QEMU for multi-arch builds - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 + with: + platforms: ${{ env.PLATFORMS }} + - name: Deploy multi-arch base container image to Docker Hub # Do not build for v6.0 and v6.1. We can simply reuse the one from v6.2. if: ${{ matrix.branch != 'v6.0' && matrix.branch != 'v6.1' }} From 7c4242ea384e06b70664bffde4c5d1828ac70b1a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 01:18:27 +0200 Subject: [PATCH 030/622] feat(ct): add additional tags for base images --- .github/workflows/container_base_push.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index ed9e2a1cc1e..21a15e02c1d 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -86,11 +86,20 @@ jobs: uses: docker/setup-qemu-action@v3 with: platforms: ${{ env.PLATFORMS }} + - name: Add additional tags as options + run: | + # For the development branch, update the latest tag in addition + if [[ "${{ matrix.branch }}" == "develop" ]]; then + echo "DOCKER_TAGS=-Ddocker.tags.develop=latest" | tee -a "${GITHUB_ENV}" + # In case of releases <=6.2, we still need to provide backward compatible names "alpha" and "unstable" + elif [[ "${{ matrix.branch }}" == "v6.2" ]]; then + echo "DOCKER_TAGS=-Ddocker.tags.additional=alpha" | tee -a "${GITHUB_ENV}" + fi - name: Deploy multi-arch base container image to Docker Hub # Do not build for v6.0 and v6.1. We can simply reuse the one from v6.2. if: ${{ matrix.branch != 'v6.0' && matrix.branch != 'v6.1' }} - run: mvn -f modules/container-base -Pct deploy -Ddocker.skip.push -Ddocker.platforms=${{ env.PLATFORMS }} + run: mvn -f modules/container-base -Pct deploy -Ddocker.skip.push ${DOCKER_TAGS} -Ddocker.platforms=${{ env.PLATFORMS }} - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} name: Push description to DockerHub From c08220dd4e40895973206d178f9cdeb7c7b5bf1c Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 01:35:45 +0200 Subject: [PATCH 031/622] test(ct): add feature branch to matrix and build for everything but it. --- .github/workflows/container_base_push.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 21a15e02c1d..af22b6b1a55 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -32,11 +32,12 @@ jobs: steps: - name: Build branch matrix options id: matrix + # TODO: remove the feature branch and re-enable the if/else! run: | # Get last three releases and include develop branch as matrix elements #if [[ "${{ github.event_name }}" == "schedule" ]]; then echo "branches=$(curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | \ - jq '[ .[0:3] | .[].tag_name, "develop" ]')" | tr -d "\n" | tr -s " " | \ + jq '[ .[0:3] | .[].tag_name, "develop", "10478-version-base-img" ]')" | tr -d "\n" | tr -s " " | \ tee -a "$GITHUB_OUTPUT" #else # echo "branches=['develop']" | tee -a "$GITHUB_OUTPUT" @@ -87,19 +88,28 @@ jobs: with: platforms: ${{ env.PLATFORMS }} - name: Add additional tags as options + # TODO: remove the feature branch and re-enable the if/else! run: | # For the development branch, update the latest tag in addition - if [[ "${{ matrix.branch }}" == "develop" ]]; then + if [[ "${{ matrix.branch }}" == "develop" || "${{ matrix.branch }}" == "10478-version-base-img" ]]; then echo "DOCKER_TAGS=-Ddocker.tags.develop=latest" | tee -a "${GITHUB_ENV}" # In case of releases <=6.2, we still need to provide backward compatible names "alpha" and "unstable" elif [[ "${{ matrix.branch }}" == "v6.2" ]]; then echo "DOCKER_TAGS=-Ddocker.tags.additional=alpha" | tee -a "${GITHUB_ENV}" fi + # TODO: remove when feature branch is done + - name: Skip all but feature-branch + if: ${{ matrix.branch != '10478-version-base-img' }} + uses: actions/github-script@v3 + with: + script: | + core.setFailed('Stopping on purpose to avoid mayhem') + - name: Deploy multi-arch base container image to Docker Hub # Do not build for v6.0 and v6.1. We can simply reuse the one from v6.2. if: ${{ matrix.branch != 'v6.0' && matrix.branch != 'v6.1' }} - run: mvn -f modules/container-base -Pct deploy -Ddocker.skip.push ${DOCKER_TAGS} -Ddocker.platforms=${{ env.PLATFORMS }} + run: mvn -f modules/container-base -Pct deploy ${DOCKER_TAGS} -Ddocker.platforms=${{ env.PLATFORMS }} - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} name: Push description to DockerHub From c4cbc576ef3dba974e8bffa338ad7a74d3962960 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 01:56:10 +0200 Subject: [PATCH 032/622] chore,test(ct): update script action to v7 --- .github/workflows/container_base_push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index af22b6b1a55..f7f8e889d56 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -100,7 +100,7 @@ jobs: # TODO: remove when feature branch is done - name: Skip all but feature-branch if: ${{ matrix.branch != '10478-version-base-img' }} - uses: actions/github-script@v3 + uses: actions/github-script@v7 with: script: | core.setFailed('Stopping on purpose to avoid mayhem') From 05e48875387f769fb9cc966868699cbd4edce8c1 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 01:56:44 +0200 Subject: [PATCH 033/622] fix(ct): add missing option for DMP to add additional tags to base image --- .github/workflows/container_base_push.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index f7f8e889d56..cefa63786c9 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -92,10 +92,10 @@ jobs: run: | # For the development branch, update the latest tag in addition if [[ "${{ matrix.branch }}" == "develop" || "${{ matrix.branch }}" == "10478-version-base-img" ]]; then - echo "DOCKER_TAGS=-Ddocker.tags.develop=latest" | tee -a "${GITHUB_ENV}" + echo "DOCKER_TAGS=-Ddocker.imagePropertyConfiguration=override -Ddocker.tags.develop=latest" | tee -a "${GITHUB_ENV}" # In case of releases <=6.2, we still need to provide backward compatible names "alpha" and "unstable" elif [[ "${{ matrix.branch }}" == "v6.2" ]]; then - echo "DOCKER_TAGS=-Ddocker.tags.additional=alpha" | tee -a "${GITHUB_ENV}" + echo "DOCKER_TAGS=-Ddocker.imagePropertyConfiguration=override -Ddocker.tags.additional=alpha" | tee -a "${GITHUB_ENV}" fi # TODO: remove when feature branch is done - name: Skip all but feature-branch From be5b9c98cbcab64085ec8a5ffc5b8275a4fec335 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 14:09:53 +0200 Subject: [PATCH 034/622] feat(ct): trigger build of app image when all builds of base image succeed We use the branch name from the matrix as input argument. It's not being used yet. --- .github/workflows/container_app_push.yml | 5 +++++ .github/workflows/container_base_push.yml | 22 ++++++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/.github/workflows/container_app_push.yml b/.github/workflows/container_app_push.yml index b3e247e376c..fba693eee05 100644 --- a/.github/workflows/container_app_push.yml +++ b/.github/workflows/container_app_push.yml @@ -5,6 +5,11 @@ on: # We are deliberately *not* running on push events here to avoid double runs. # Instead, push events will trigger from the base image and maven unit tests via workflow_call. workflow_call: + inputs: + branch: + type: string + description: "A tag or branch to checkout for building the image" + required: true pull_request: branches: - develop diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index cefa63786c9..4e80caa8586 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -107,6 +107,7 @@ jobs: - name: Deploy multi-arch base container image to Docker Hub + id: build # Do not build for v6.0 and v6.1. We can simply reuse the one from v6.2. if: ${{ matrix.branch != 'v6.0' && matrix.branch != 'v6.1' }} run: mvn -f modules/container-base -Pct deploy ${DOCKER_TAGS} -Ddocker.platforms=${{ env.PLATFORMS }} @@ -120,14 +121,27 @@ jobs: repository: gdcc/base short-description: "Dataverse Base Container image providing Payara application server and optimized configuration" readme-filepath: ./modules/container-base/README.md + + # - if: always() + # name: Save status (workaround for matrix outputs) + # run: | + # # steps.build.outcome is the status BEFORE continue-on-error + # echo "STATUS_$( echo "${{ matrix.branch }}" | tr ".:;,-/ " "_" )=${{ steps.build.outcome }}" | tee -a "${GITHUB_ENV}" + push-app-img: name: "Rebase & Publish App Image" permissions: contents: read packages: write pull-requests: write - needs: build - # We do not release a new base image for pull requests, so do not trigger. - # if: ${{ github.event_name != 'pull_request' }} + needs: + - discover + - build + strategy: + fail-fast: false + matrix: + branch: ${{ fromJson(needs.discover.outputs.branches) }} uses: ./.github/workflows/container_app_push.yml - secrets: inherit + with: + branch: ${{ matrix.branch }} + From d4a196b8a6247542051bf3d0daf24789121d6b94 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 14:12:00 +0200 Subject: [PATCH 035/622] test(ct): let's skip the build for anything but our feature branch --- .github/workflows/container_app_push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/container_app_push.yml b/.github/workflows/container_app_push.yml index fba693eee05..a92ca4aecbd 100644 --- a/.github/workflows/container_app_push.yml +++ b/.github/workflows/container_app_push.yml @@ -35,7 +35,7 @@ jobs: packages: write pull-requests: write # Only run in upstream repo - avoid unnecessary runs in forks - if: ${{ github.repository_owner == 'IQSS' }} + if: ${{ github.repository_owner == 'IQSS' && inputs.branch == '10478-version-base-img' }} steps: - name: Checkout repository From 77aec0d62dcb0aad1e3e1bf7abdf740fc7bed540 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 14:19:35 +0200 Subject: [PATCH 036/622] style(ct): fix wording to test CI pipeline --- modules/container-base/src/main/docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index f093ced37c1..0905ebb62a1 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -221,7 +221,7 @@ RUN < Date: Thu, 11 Apr 2024 14:22:08 +0200 Subject: [PATCH 037/622] test(ct): try what happens when removing the on purpose failure of base image workflow --- .github/workflows/container_base_push.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 4e80caa8586..8c15c0a7ff7 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -97,13 +97,14 @@ jobs: elif [[ "${{ matrix.branch }}" == "v6.2" ]]; then echo "DOCKER_TAGS=-Ddocker.imagePropertyConfiguration=override -Ddocker.tags.additional=alpha" | tee -a "${GITHUB_ENV}" fi + # TODO: remove when feature branch is done - - name: Skip all but feature-branch - if: ${{ matrix.branch != '10478-version-base-img' }} - uses: actions/github-script@v7 - with: - script: | - core.setFailed('Stopping on purpose to avoid mayhem') + #- name: Skip all but feature-branch + # if: ${{ matrix.branch != '10478-version-base-img' }} + # uses: actions/github-script@v7 + # with: + # script: | + # core.setFailed('Stopping on purpose to avoid mayhem') - name: Deploy multi-arch base container image to Docker Hub From 2887d13ca93103420ad78973e4e975dd0da90b41 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 14:42:40 +0200 Subject: [PATCH 038/622] fix(ct): make secrets available to app workflow again Deleted by accident when working on alternativ matrix way to build app image after base. --- .github/workflows/container_base_push.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 8c15c0a7ff7..1631c08f10e 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -135,6 +135,7 @@ jobs: contents: read packages: write pull-requests: write + secrets: inherit needs: - discover - build From bbe527d4646b09fd41fa4e57c52e2759183d2042 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 08:44:06 +0200 Subject: [PATCH 039/622] chore(ct,build): upgrade to DMP plugin v0.44.0 - Enable -Ddocker.noCache for BuildX - Hopefully straightens out some bugs on Darwin with M1/M2 --- modules/dataverse-parent/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 612902b47a4..d03d3e242fc 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -199,7 +199,7 @@ 1.7.0 - 0.43.4 + 0.44.0 From c496ef31304f8ced339aace0b903a52d69991b71 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 08:45:35 +0200 Subject: [PATCH 040/622] feat(ct): pin down name of the builder for base image This way we have a distinct path and name to inject a BuildX builder instance configuration --- modules/container-base/pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 2754b081986..72811e34e84 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -100,6 +100,7 @@ ${docker.platforms} + dataverse ${project.build.directory}/buildx-state Dockerfile From 82c8e725365c72bbc4a2a906779bc1c61a71ff89 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 08:46:05 +0200 Subject: [PATCH 041/622] ci(ct): make number of past release configurable via env var --- .github/workflows/container_base_push.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 1631c08f10e..fe3b38d0284 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -17,6 +17,7 @@ on: env: PLATFORMS: linux/amd64,linux/arm64 + NUM_PAST_RELEASES: 3 jobs: discover: @@ -37,7 +38,7 @@ jobs: # Get last three releases and include develop branch as matrix elements #if [[ "${{ github.event_name }}" == "schedule" ]]; then echo "branches=$(curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | \ - jq '[ .[0:3] | .[].tag_name, "develop", "10478-version-base-img" ]')" | tr -d "\n" | tr -s " " | \ + jq '[ .[0:${{ env.NUM_PAST_RELEASES }}] | .[].tag_name, "develop", "10478-version-base-img" ]')" | tr -d "\n" | tr -s " " | \ tee -a "$GITHUB_OUTPUT" #else # echo "branches=['develop']" | tee -a "$GITHUB_OUTPUT" From 0c91541f5c11c3daead61f657dbdee6af4addc43 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 08:54:49 +0200 Subject: [PATCH 042/622] fix(ct): correction of tags specification for base image workflow Per example at https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#running-your-workflow-only-when-a-push-of-specific-tags-occurs it should be two asterisks. --- .github/workflows/container_base_push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index fe3b38d0284..838321b10f6 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -4,7 +4,7 @@ name: Base Container Image on: push: tags: - - 'v[6-9].*' + - 'v[6-9].**' branches: - 'develop' - '10478-version-base-img' From eef60f295ced438a7c3e91fbef61edc273ec51d9 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 08:56:21 +0200 Subject: [PATCH 043/622] refactor(ct): when pushes for a tag or branch occur, build for it Before, we would have run the workflow for develop only. In case develop has merges before the run starts because of a tag being created, this may result in undesired side effects. Keep in mind that pushes of tags here will also trigger releasing a new application image! --- .github/workflows/container_base_push.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 838321b10f6..ec8881b3ef5 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -41,7 +41,8 @@ jobs: jq '[ .[0:${{ env.NUM_PAST_RELEASES }}] | .[].tag_name, "develop", "10478-version-base-img" ]')" | tr -d "\n" | tr -s " " | \ tee -a "$GITHUB_OUTPUT" #else - # echo "branches=['develop']" | tee -a "$GITHUB_OUTPUT" + # # Note: github.ref_name will be the name of the branch or the tag pushed + # echo "branches=['${{ github.ref_name }}']" | tee -a "$GITHUB_OUTPUT" #fi build: From 90cb9ce129ff1b3b9de9e0585232418b820d7dbb Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 09:59:01 +0200 Subject: [PATCH 044/622] feat(ct): replace QEMU with remote builder for ARM64 in base image --- .github/workflows/container_base_push.yml | 35 +++++++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index ec8881b3ef5..324f8134a64 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -85,10 +85,39 @@ jobs: with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Set up QEMU for multi-arch builds - uses: docker/setup-qemu-action@v3 + + # This is replaced by adding a remote ARM64 build host in addition to the local AMD64 + # - name: Set up QEMU for multi-arch builds + # uses: docker/setup-qemu-action@v3 + # with: + # platforms: ${{ env.PLATFORMS }} + # Setup SSH access to ARM64 builder node + - name: Setup SSH agent + uses: webfactory/ssh-agent@v0.9.0 with: - platforms: ${{ env.PLATFORMS }} + ssh-private-key: ${{ secrets.BUILDER_ARM64_SSH_PRIVATE_KEY }} + - name: Provide the known hosts key and the builder config + run: | + echo "${{ secrets.BUILDER_ARM64_SSH_HOST_KEY }}" > ~/.ssh/known_hosts + mkdir -p modules/container-base/target/buildx-state/buildx/instances + cat modules/container-base/target/buildx-state/buildx/instances/dataverse << EOF + { "Name": "dataverse", + "Driver": "docker-container", + "Dynamic": false, + "Nodes": [{"Name": "dataverse0", + "Endpoint": "unix:///var/run/docker.sock", + "Platforms": [{"os": "linux", "architecture": "amd64"}], + "DriverOpts": null, + "Flags": ["--allow-insecure-entitlement=network.host"], + "Files": null}, + {"Name": "dataverse1", + "Endpoint": "ssh://${{ secret.BUILDER_ARM64_SSH_CONNECTION }}", + "Platforms": [{"os": "linux", "architecture": "arm64"}], + "DriverOpts": null, + "Flags": ["--allow-insecure-entitlement=network.host"], + "Files": null}]} + EOF + - name: Add additional tags as options # TODO: remove the feature branch and re-enable the if/else! run: | From d7fbf871cffe8685e75533280e4ee2fa1f1add60 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 10:00:17 +0200 Subject: [PATCH 045/622] feat(ct): disable caching during base image build The ARM64 remote build host will otherwise reuse the cache with potentially stale content --- .github/workflows/container_base_push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 324f8134a64..aaf20397f9d 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -142,7 +142,7 @@ jobs: id: build # Do not build for v6.0 and v6.1. We can simply reuse the one from v6.2. if: ${{ matrix.branch != 'v6.0' && matrix.branch != 'v6.1' }} - run: mvn -f modules/container-base -Pct deploy ${DOCKER_TAGS} -Ddocker.platforms=${{ env.PLATFORMS }} + run: mvn -f modules/container-base -Pct deploy -Ddocker.noCache ${DOCKER_TAGS} -Ddocker.platforms=${{ env.PLATFORMS }} - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} name: Push description to DockerHub From b6fb1224b9ca6fd0f345efc6ad73ebddea962450 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 10:02:31 +0200 Subject: [PATCH 046/622] style(ct): fix typo in secrets reference --- .github/workflows/container_base_push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index aaf20397f9d..9243c2725e1 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -111,7 +111,7 @@ jobs: "Flags": ["--allow-insecure-entitlement=network.host"], "Files": null}, {"Name": "dataverse1", - "Endpoint": "ssh://${{ secret.BUILDER_ARM64_SSH_CONNECTION }}", + "Endpoint": "ssh://${{ secrets.BUILDER_ARM64_SSH_CONNECTION }}", "Platforms": [{"os": "linux", "architecture": "arm64"}], "DriverOpts": null, "Flags": ["--allow-insecure-entitlement=network.host"], From b3ac714f98971ef21cd47650e04a370226334fc8 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 10:04:08 +0200 Subject: [PATCH 047/622] fix(ct): add missing pipe redirection for builder config --- .github/workflows/container_base_push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 9243c2725e1..69cd31afcf4 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -100,7 +100,7 @@ jobs: run: | echo "${{ secrets.BUILDER_ARM64_SSH_HOST_KEY }}" > ~/.ssh/known_hosts mkdir -p modules/container-base/target/buildx-state/buildx/instances - cat modules/container-base/target/buildx-state/buildx/instances/dataverse << EOF + cat > modules/container-base/target/buildx-state/buildx/instances/dataverse << EOF { "Name": "dataverse", "Driver": "docker-container", "Dynamic": false, From 898d9053becaa64151c06ec645080f6544292776 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 11:06:32 +0200 Subject: [PATCH 048/622] refactor(ct): switch back to "maven" as builder name Using "maven" (the default name) again allows us to use the ARM64 builder for previously released images as well as develop before we merge the feature branch --- .github/workflows/container_base_push.yml | 8 ++++---- modules/container-base/pom.xml | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 69cd31afcf4..e4733cca027 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -100,17 +100,17 @@ jobs: run: | echo "${{ secrets.BUILDER_ARM64_SSH_HOST_KEY }}" > ~/.ssh/known_hosts mkdir -p modules/container-base/target/buildx-state/buildx/instances - cat > modules/container-base/target/buildx-state/buildx/instances/dataverse << EOF - { "Name": "dataverse", + cat > modules/container-base/target/buildx-state/buildx/instances/maven << EOF + { "Name": "maven", "Driver": "docker-container", "Dynamic": false, - "Nodes": [{"Name": "dataverse0", + "Nodes": [{"Name": "maven0", "Endpoint": "unix:///var/run/docker.sock", "Platforms": [{"os": "linux", "architecture": "amd64"}], "DriverOpts": null, "Flags": ["--allow-insecure-entitlement=network.host"], "Files": null}, - {"Name": "dataverse1", + {"Name": "maven1", "Endpoint": "ssh://${{ secrets.BUILDER_ARM64_SSH_CONNECTION }}", "Platforms": [{"os": "linux", "architecture": "arm64"}], "DriverOpts": null, diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 72811e34e84..2754b081986 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -100,7 +100,6 @@ ${docker.platforms} - dataverse ${project.build.directory}/buildx-state Dockerfile From 577804b3776847131b6e5f05727c9f0f69232d63 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 13:02:15 +0200 Subject: [PATCH 049/622] refactor(ct): use remote ARM64 builder only for pushes, but not scheduled maintenance --- .github/workflows/container_base_push.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index e4733cca027..36741ccd211 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -86,17 +86,21 @@ jobs: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - # This is replaced by adding a remote ARM64 build host in addition to the local AMD64 - # - name: Set up QEMU for multi-arch builds - # uses: docker/setup-qemu-action@v3 - # with: - # platforms: ${{ env.PLATFORMS }} - # Setup SSH access to ARM64 builder node + # In case of scheduled maintenance, we don't care about buildtime: use QEMU for AMD64 + ARM64 + - name: Set up QEMU for multi-arch builds + if: ${{ github.event_name == 'schedule' }} + uses: docker/setup-qemu-action@v3 + with: + platforms: ${{ env.PLATFORMS }} + # In case this is a push to develop, we care about buildtime. + # Configure a remote ARM64 build host in addition to the local AMD64 in two steps. - name: Setup SSH agent + if: ${{ github.event_name != 'schedule' }} uses: webfactory/ssh-agent@v0.9.0 with: ssh-private-key: ${{ secrets.BUILDER_ARM64_SSH_PRIVATE_KEY }} - name: Provide the known hosts key and the builder config + if: ${{ github.event_name != 'schedule' }} run: | echo "${{ secrets.BUILDER_ARM64_SSH_HOST_KEY }}" > ~/.ssh/known_hosts mkdir -p modules/container-base/target/buildx-state/buildx/instances From 25cbdd7b19478f9110ceb55436f9f84bcbb3aff5 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 14:19:35 +0200 Subject: [PATCH 050/622] test(ct): use DMP v0.45-SNAPSHOT to see if pushing images that have a ARG in their ref works with it --- modules/dataverse-parent/pom.xml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index d03d3e242fc..473c143296a 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -199,7 +199,7 @@ 1.7.0 - 0.44.0 + 0.45-SNAPSHOT @@ -215,6 +215,14 @@ never + + oss.sonatype.org + https://oss.sonatype.org/content/repositories/snapshots + + true + always + + From 2de674f6d80249ea2e446275533f4e3b6daa630f Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 15:23:21 +0200 Subject: [PATCH 051/622] feat(build,ci): workflow pre-seeding Maven cache from develop #10428 Contains the feature branch for now, too. Will be deleted later before merging it to develop. --- .github/workflows/maven_cache_seed.yaml | 37 +++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/maven_cache_seed.yaml diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml new file mode 100644 index 00000000000..e82ce36f829 --- /dev/null +++ b/.github/workflows/maven_cache_seed.yaml @@ -0,0 +1,37 @@ +name: Maven Cache Seeding + +on: + push: + branches: + - develop + - 10478-version-base-img + +env: + COMMON_CACHE_NAME: dataverse-maven-cache + +jobs: + preseed: + name: Drop and Re-Seed Local Repository + runs-on: ubuntu-latest + if: ${{ github.event_name == 'push' }} + steps: + - name: Drop common cache + uses: prantlf/delete-cache-action@v2 + with: + key: ${{ env.COMMON_CACHE_NAME }} + - name: Checkout repository + uses: actions/checkout@v4 + - name: Set up JDK + uses: actions/setup-java@v4 + with: + java-version: "17" + distribution: temurin + - name: Re-Seed common cache + run: | + mvn -B -f modules/dataverse-parent dependency:go-offline dependency:resolve-plugins + - name: Save the cache + uses: actions/cache/save@v4 + with: + path: ~/.m2/repository + key: ${{ env.COMMON_CACHE_NAME }} + From b1943c114110405a613c4093fb2d7eee5fa904d7 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 15:35:38 +0200 Subject: [PATCH 052/622] fix(build,ci): ignore not found when deleting common cache --- .github/workflows/maven_cache_seed.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index e82ce36f829..4650cfcf930 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -16,6 +16,7 @@ jobs: if: ${{ github.event_name == 'push' }} steps: - name: Drop common cache + continue-on-error: true # we don't care if the cache is not around uses: prantlf/delete-cache-action@v2 with: key: ${{ env.COMMON_CACHE_NAME }} From 941fbc904d5700cebcff9d64b0a8a1f1c2268d36 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 15:51:33 +0200 Subject: [PATCH 053/622] style(ct): remove some stale comments --- .github/workflows/container_base_push.yml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 36741ccd211..7f2f194cbcd 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -123,7 +123,7 @@ jobs: EOF - name: Add additional tags as options - # TODO: remove the feature branch and re-enable the if/else! + # TODO: remove the feature branch run: | # For the development branch, update the latest tag in addition if [[ "${{ matrix.branch }}" == "develop" || "${{ matrix.branch }}" == "10478-version-base-img" ]]; then @@ -133,15 +133,6 @@ jobs: echo "DOCKER_TAGS=-Ddocker.imagePropertyConfiguration=override -Ddocker.tags.additional=alpha" | tee -a "${GITHUB_ENV}" fi - # TODO: remove when feature branch is done - #- name: Skip all but feature-branch - # if: ${{ matrix.branch != '10478-version-base-img' }} - # uses: actions/github-script@v7 - # with: - # script: | - # core.setFailed('Stopping on purpose to avoid mayhem') - - - name: Deploy multi-arch base container image to Docker Hub id: build # Do not build for v6.0 and v6.1. We can simply reuse the one from v6.2. From b64b6aff9ce38009fdf90c451493b257690472f7 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 15:52:28 +0200 Subject: [PATCH 054/622] feat(ct): make use of common cache in app container builds #10428 --- .github/workflows/container_app_push.yml | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/.github/workflows/container_app_push.yml b/.github/workflows/container_app_push.yml index a92ca4aecbd..347fcf06b86 100644 --- a/.github/workflows/container_app_push.yml +++ b/.github/workflows/container_app_push.yml @@ -42,11 +42,18 @@ jobs: uses: actions/checkout@v3 - name: Set up JDK + id: setup-java uses: actions/setup-java@v3 with: java-version: "17" distribution: temurin cache: maven + - name: Download common cache on branch cache miss + if: ${{ steps.setup-java.outputs.cache-hit != 'true' }} + uses: actions/cache/restore@v4 + with: + key: dataverse-maven-cache + - name: Build app and configbaker container image with local architecture and submodules (profile will skip tests) run: > @@ -111,11 +118,19 @@ jobs: if: needs.check-secrets.outputs.available == 'true' && ( github.event_name != 'push' || ( github.event_name == 'push' && contains(fromJSON('["develop", "master"]'), github.ref_name))) steps: - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 + - uses: actions/checkout@v4 + - name: Set up JDK + id: setup-java + uses: actions/setup-java@v3 with: java-version: "17" distribution: temurin + cache: maven + - name: Download common cache on branch cache miss + if: ${{ steps.setup-java.outputs.cache-hit != 'true' }} + uses: actions/cache/restore@v4 + with: + key: dataverse-maven-cache # Depending on context, we push to different targets. Login accordingly. - if: github.event_name != 'pull_request' From 4f48123f73674eb56f3f687cc2e7df55908e13db Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 15:52:49 +0200 Subject: [PATCH 055/622] feat(ci): remove feature branch from maven seed workflow --- .github/workflows/maven_cache_seed.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index 4650cfcf930..d002a446b99 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -4,7 +4,6 @@ on: push: branches: - develop - - 10478-version-base-img env: COMMON_CACHE_NAME: dataverse-maven-cache From 721814a62672a8483d1fd0708071444bc239e806 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 16:01:47 +0200 Subject: [PATCH 056/622] style(ct): rename base image revision from RX to revX It's just much more readable --- modules/container-base/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 2754b081986..e5de8841530 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -42,7 +42,7 @@ docker-build gdcc/base:${base.image.tag} - R${project.version} + rev${project.version} eclipse-temurin:${target.java.version}-jre 1000 1000 From 3c0650f6710bcda94c0e83278654a454466b5a41 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 16:02:12 +0200 Subject: [PATCH 057/622] feat(ct): make dataverse app image use base rev1 image --- pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8f9d06b8744..fa267b6fbee 100644 --- a/pom.xml +++ b/pom.xml @@ -956,7 +956,8 @@ unstable false gdcc/base:${base.image.tag} - unstable + + rev1 gdcc/configbaker:${conf.image.tag} ${app.image.tag} From 06a21612347e0a64cfe0adfa05d1823c4927f5b2 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 16:15:00 +0200 Subject: [PATCH 058/622] refactor(ci): delete common cache after we downloaded the old one #10428 --- .github/workflows/maven_cache_seed.yaml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index d002a446b99..5a38abed767 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -14,11 +14,6 @@ jobs: runs-on: ubuntu-latest if: ${{ github.event_name == 'push' }} steps: - - name: Drop common cache - continue-on-error: true # we don't care if the cache is not around - uses: prantlf/delete-cache-action@v2 - with: - key: ${{ env.COMMON_CACHE_NAME }} - name: Checkout repository uses: actions/checkout@v4 - name: Set up JDK @@ -26,9 +21,16 @@ jobs: with: java-version: "17" distribution: temurin - - name: Re-Seed common cache + - name: Seed common cache run: | mvn -B -f modules/dataverse-parent dependency:go-offline dependency:resolve-plugins + # This non-obvious order is due to the fact that the download via Maven will take a very long time. + # Jobs should not be left without a cache. Deleting and saving in one go leaves only a small chance for a cache miss. + - name: Drop common cache + continue-on-error: true # we don't care if the cache is not around + uses: prantlf/delete-cache-action@v2 + with: + key: ${{ env.COMMON_CACHE_NAME }} - name: Save the cache uses: actions/cache/save@v4 with: From 818bfd4ec32fa0d67f39611f6f3235af730fbfb9 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 16:15:20 +0200 Subject: [PATCH 059/622] test(ci): readd feature branch to test cache handling logic --- .github/workflows/maven_cache_seed.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index 5a38abed767..f4b13725e70 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -4,6 +4,7 @@ on: push: branches: - develop + - 10478-version-base-img env: COMMON_CACHE_NAME: dataverse-maven-cache From a28997c815685b0527e3250c7526d9a03521ddae Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 16:16:33 +0200 Subject: [PATCH 060/622] style(ct): add common cache download to base image Without accurate measuring it seems like we use ~1 minute to download Maven artifacts for this workflow. Lets try to cut that down some by reusing the common cache. --- .github/workflows/container_base_push.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 7f2f194cbcd..667808d4d08 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -69,6 +69,7 @@ jobs: echo "JAVA_VERSION=$(grep '' modules/dataverse-parent/pom.xml | cut -f2 -d'>' | cut -f1 -d'<')" >> ${GITHUB_ENV} - name: Set up JDK ${{ env.JAVA_VERSION }} + id: setup-java uses: actions/setup-java@v4 with: java-version: ${{ env.JAVA_VERSION }} @@ -76,6 +77,12 @@ jobs: cache: 'maven' cache-dependency-path: | modules/container-base/pom.xml + - name: Download common cache on branch cache miss + if: ${{ steps.setup-java.outputs.cache-hit != 'true' }} + uses: actions/cache/restore@v4 + with: + key: dataverse-maven-cache + path: ~/.m2/repository # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and # on events in context of upstream because secrets. PRs run in context of forks by default! From 085e41e9c1bd7c550e76ad47b5450ac5fe197450 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 16:19:59 +0200 Subject: [PATCH 061/622] style(ct): remove stale comment about platforms empty in base image --- modules/container-base/pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index e5de8841530..60be420bd9a 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -97,7 +97,6 @@ - ${docker.platforms} ${project.build.directory}/buildx-state From 2fe0ca39ac5c9cf044ad833b7e66de449d61efa3 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 16:52:12 +0200 Subject: [PATCH 062/622] fix(build,ci): downgrade to cache action v2 Per https://github.com/actions/cache/issues/1361 restores in v3 and v4 were not working, maybe saving is botched, too? --- .github/workflows/maven_cache_seed.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index f4b13725e70..148808a4b04 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -33,7 +33,7 @@ jobs: with: key: ${{ env.COMMON_CACHE_NAME }} - name: Save the cache - uses: actions/cache/save@v4 + uses: actions/cache@v2 with: path: ~/.m2/repository key: ${{ env.COMMON_CACHE_NAME }} From afad97e4160d60e31b01611e8a1e28ad0192631f Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 17:09:50 +0200 Subject: [PATCH 063/622] fix(build,ci,ct): fix missing path for cache restore --- .github/workflows/container_app_push.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/container_app_push.yml b/.github/workflows/container_app_push.yml index 347fcf06b86..3344a8e2d0d 100644 --- a/.github/workflows/container_app_push.yml +++ b/.github/workflows/container_app_push.yml @@ -52,6 +52,7 @@ jobs: if: ${{ steps.setup-java.outputs.cache-hit != 'true' }} uses: actions/cache/restore@v4 with: + path: ~/.m2/repository key: dataverse-maven-cache @@ -130,6 +131,7 @@ jobs: if: ${{ steps.setup-java.outputs.cache-hit != 'true' }} uses: actions/cache/restore@v4 with: + path: ~/.m2/repository key: dataverse-maven-cache # Depending on context, we push to different targets. Login accordingly. From 1c1c30e8b1d11f830354535f5164a92112eb3ca3 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 17:11:51 +0200 Subject: [PATCH 064/622] test(ci): fix a comment to trigger base image workflow --- modules/container-base/src/main/docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 0905ebb62a1..a8bd3a32ca3 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -203,7 +203,7 @@ RUN < Date: Fri, 12 Apr 2024 17:17:49 +0200 Subject: [PATCH 065/622] style(ci): remove feature branch from maven cache seeding again --- .github/workflows/maven_cache_seed.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index 148808a4b04..85f1381c789 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -4,7 +4,6 @@ on: push: branches: - develop - - 10478-version-base-img env: COMMON_CACHE_NAME: dataverse-maven-cache From 43402435ae4d619b872ac9bd14704f54306fc49e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 17:20:08 +0200 Subject: [PATCH 066/622] doc(ci): add note about cache availability to seeding workflow --- .github/workflows/maven_cache_seed.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index 85f1381c789..e11d8c91c0e 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -31,6 +31,8 @@ jobs: uses: prantlf/delete-cache-action@v2 with: key: ${{ env.COMMON_CACHE_NAME }} + # NOTE: It is vital here to remember that only caches with the scope of the default branch are + # available to other branches. We use the v2 action here to save it anyway. - name: Save the cache uses: actions/cache@v2 with: From 899e8aa6548d8b6076c266807cc265d4c0bb049b Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 17:31:16 +0200 Subject: [PATCH 067/622] feat(ci): make sure to rejuvenate the common cache every 7 days --- .github/workflows/maven_cache_seed.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index e11d8c91c0e..d31559138b8 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -4,6 +4,10 @@ on: push: branches: - develop + # According to https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#usage-limits-and-eviction-policy + # all caches are depleted after 7 days of no access. Make sure we rejuvenate every 7 days to keep it available. + schedule: + - cron: '23 2 * * 0' # Run for 'develop' every Sunday at 02:23 UTC env: COMMON_CACHE_NAME: dataverse-maven-cache @@ -12,7 +16,6 @@ jobs: preseed: name: Drop and Re-Seed Local Repository runs-on: ubuntu-latest - if: ${{ github.event_name == 'push' }} steps: - name: Checkout repository uses: actions/checkout@v4 From f538896971022cc4fa011a38aca41936125a30dc Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 15 Apr 2024 14:14:56 +0200 Subject: [PATCH 068/622] chore(ct): replace wait-for with wait4x Aligning configbaker and base image with same tool. wait4x has much more features to wait for different services. --- doc/sphinx-guides/source/container/base-image.rst | 2 +- modules/container-base/src/main/docker/Dockerfile | 12 ++++++------ .../docker/scripts/init_3_wait_dataverse_db_host.sh | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index c41250d48c5..29c357b91f6 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -46,7 +46,7 @@ The base image provides: - CLI tools necessary to run Dataverse (i. e. ``curl`` or ``jq`` - see also :doc:`../installation/prerequisites` in Installation Guide) - Linux tools for analysis, monitoring and so on - `Jattach `__ (attach to running JVM) -- `wait-for `__ (tool to "wait for" a service to be available) +- `wait4x `__ (tool to "wait for" a service to be available) - `dumb-init `__ (see :ref:`below ` for details) This image is created as a "multi-arch image", see :ref:`below `. diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index a8bd3a32ca3..e3fa6e477a9 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -92,8 +92,7 @@ EOF ARG JATTACH_VERSION="v2.1" ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e" -ARG WAIT_FOR_VERSION="v2.2.3" -ARG WAIT_FOR_CHECKSUM="70271181be69cd2c7265b2746f97fccfd7e8aa1059894138a775369c23589ff4" +ARG WAIT4X_VERSION="v2.14.0" ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat dumb-init" # Installing the packages in an extra container layer for better caching @@ -108,10 +107,11 @@ RUN < Date: Mon, 15 Apr 2024 14:17:55 +0200 Subject: [PATCH 069/622] build(ct): make target architecture available in base image build As per https://docs.docker.com/reference/dockerfile/#automatic-platform-args-in-the-global-scope BuildKit / buildx will expose the target architecture. It requires adding an ARG in the Dockerfile to inject the data. --- modules/container-base/src/main/docker/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index e3fa6e477a9..256d9159d2d 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -69,6 +69,9 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin:${SCRIPT_DIR}" \ ### PART 1: SYSTEM ### ARG UID=1000 ARG GID=1000 +# Auto-populated by BuildKit / buildx +#ARG TARGETARCH="amd64" +ARG TARGETARCH USER root WORKDIR / SHELL ["/bin/bash", "-euo", "pipefail", "-c"] From e86133468a0086f0701efc69ad1549e21102f284 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 15 Apr 2024 14:20:18 +0200 Subject: [PATCH 070/622] chore(ct): upgrade base image with jattach v2.2 jattach binary is now available for ARM64 and AMD64, but requires special handling with download URLs and checksums. --- .../container-base/src/main/docker/Dockerfile | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 256d9159d2d..fbd43604eac 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -93,8 +93,9 @@ RUN < Date: Tue, 16 Apr 2024 17:42:18 +0200 Subject: [PATCH 071/622] chore: remove obsolete Maven Cache seed workflow --- .github/workflows/maven_cache_seed.yaml | 44 ------------------------- 1 file changed, 44 deletions(-) delete mode 100644 .github/workflows/maven_cache_seed.yaml diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml deleted file mode 100644 index d31559138b8..00000000000 --- a/.github/workflows/maven_cache_seed.yaml +++ /dev/null @@ -1,44 +0,0 @@ -name: Maven Cache Seeding - -on: - push: - branches: - - develop - # According to https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#usage-limits-and-eviction-policy - # all caches are depleted after 7 days of no access. Make sure we rejuvenate every 7 days to keep it available. - schedule: - - cron: '23 2 * * 0' # Run for 'develop' every Sunday at 02:23 UTC - -env: - COMMON_CACHE_NAME: dataverse-maven-cache - -jobs: - preseed: - name: Drop and Re-Seed Local Repository - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Set up JDK - uses: actions/setup-java@v4 - with: - java-version: "17" - distribution: temurin - - name: Seed common cache - run: | - mvn -B -f modules/dataverse-parent dependency:go-offline dependency:resolve-plugins - # This non-obvious order is due to the fact that the download via Maven will take a very long time. - # Jobs should not be left without a cache. Deleting and saving in one go leaves only a small chance for a cache miss. - - name: Drop common cache - continue-on-error: true # we don't care if the cache is not around - uses: prantlf/delete-cache-action@v2 - with: - key: ${{ env.COMMON_CACHE_NAME }} - # NOTE: It is vital here to remember that only caches with the scope of the default branch are - # available to other branches. We use the v2 action here to save it anyway. - - name: Save the cache - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ env.COMMON_CACHE_NAME }} - From 54fe365964f8c726ec2a3e42bee5b28b41f952d0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 16 Apr 2024 17:43:42 +0200 Subject: [PATCH 072/622] style(ci): remove superfluous empty line --- .github/workflows/container_base_push.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 667808d4d08..660f9bdd861 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -86,7 +86,6 @@ jobs: # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and # on events in context of upstream because secrets. PRs run in context of forks by default! - - name: Log in to the Container registry uses: docker/login-action@v3 with: From fdb5932a1a72e57b3660a123cdc386f8145e70d4 Mon Sep 17 00:00:00 2001 From: paulboon Date: Thu, 18 Apr 2024 11:55:03 +0200 Subject: [PATCH 073/622] Use support email in the system email message 'closing' text --- src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index 1eee9c65501..49b09cf98ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -117,10 +117,11 @@ public boolean sendSystemEmail(String to, String subject, String messageText, bo return false; } InternetAddress systemAddress = optionalAddress.get(); + InternetAddress supportAddress = getSupportAddress().orElse(systemAddress); String body = messageText + BundleUtil.getStringFromBundle(isHtmlContent ? "notification.email.closing.html" : "notification.email.closing", - List.of(BrandingUtil.getSupportTeamEmailAddress(systemAddress), BrandingUtil.getSupportTeamName(systemAddress))); + List.of(BrandingUtil.getSupportTeamEmailAddress(supportAddress), BrandingUtil.getSupportTeamName(supportAddress))); logger.fine(() -> "Sending email to %s. Subject: <<<%s>>>. Body: %s".formatted(to, subject, body)); try { From 2f61d699b6dd1f7d3b700d06d3436e44e399d0d5 Mon Sep 17 00:00:00 2001 From: paulboon Date: Tue, 30 Apr 2024 14:02:01 +0200 Subject: [PATCH 074/622] Added to the documentation and released notes for the use of the support email address in the closing text of the system emails --- .../10287-use-support-address-in-system-email-text.md | 4 ++++ doc/sphinx-guides/source/installation/config.rst | 2 ++ 2 files changed, 6 insertions(+) create mode 100644 doc/release-notes/10287-use-support-address-in-system-email-text.md diff --git a/doc/release-notes/10287-use-support-address-in-system-email-text.md b/doc/release-notes/10287-use-support-address-in-system-email-text.md new file mode 100644 index 00000000000..7375fda68e2 --- /dev/null +++ b/doc/release-notes/10287-use-support-address-in-system-email-text.md @@ -0,0 +1,4 @@ +### Notification Email Improvement + +The system email text has been improved to use the support email address (`dataverse.mail.support-email`) in the text where it states; 'contact us for support at', instead of the default system email address. +Using the system email address here was particularly problematic when it was a 'noreply' address. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 065277c06ee..b6555397f75 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2947,6 +2947,8 @@ If not set, the :ref:`systemEmail` is used for the feedback API/contact form ema Note that only the email address is required, which you can supply without the ``<`` and ``>`` signs, but if you include the text, it's the way to customize the name of your support team, which appears in the "from" address in emails as well as in help text in the UI. If you don't include the text, the installation name (see :ref:`Branding Your Installation`) will appear in the "from" address. +Also note that the support email address is used at the end of notification mails where it states; 'contact us for support at', followed by the support mail address if configured and the system email otherwise. + Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_MAIL_SUPPORT_EMAIL``. See also :ref:`smtp-config`. From 3c1ad632298b4c66f2583eef1cf33dd08791520c Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Tue, 7 May 2024 13:38:59 -0400 Subject: [PATCH 075/622] Json dataset validation --- doc/sphinx-guides/source/api/native-api.rst | 13 +- scripts/search/tests/data/dataset-finch3.json | 102 ++++++ .../iq/dataverse/DataverseServiceBean.java | 6 +- .../JsonSchemaConstraintException.java | 4 + .../validation/JSONDataValidation.java | 207 +++++++++++ src/main/java/propertyFiles/Bundle.properties | 12 + .../harvard/iq/dataverse/api/DatasetsIT.java | 71 ++++ .../validation/JSONDataValidationTest.java | 327 ++++++++++++++++++ 8 files changed, 738 insertions(+), 4 deletions(-) create mode 100644 scripts/search/tests/data/dataset-finch3.json create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index bcc37d6db1c..c30f551685c 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -566,8 +566,17 @@ While it is recommended to download a copy of the JSON Schema from the collectio Validate Dataset JSON File for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Validates a dataset JSON file customized for a given collection prior to creating the dataset. The validation only tests for json formatting -and the presence of required elements: +Validates a dataset JSON file customized for a given collection prior to creating the dataset. +The validation tests for: +Json formatting and the presence of required elements +typeClass must follow these rules: +- if multiple = true then value must be a list +- if typeClass = ''primitive'' the value object is a String or a List of Strings depending on the multiple flag +- if typeClass = ''compound'' the value object is a FieldDTO or a List of FieldDTOs depending on the multiple flag +- if typeClass = ''controlledVocabulary'' the value(s) are checked against the list of known values +typeName validations include: +- dsDescription validation includes checks for typeName = ''dsDescriptionValue'' (required) and ''dsDescriptionDate'' (optional) +- datasetContact validation includes checks for typeName = ''datasetContactName'' (required) and ''datasetContactEmail''; ''datasetContactAffiliation'' (optional) .. code-block:: bash diff --git a/scripts/search/tests/data/dataset-finch3.json b/scripts/search/tests/data/dataset-finch3.json new file mode 100644 index 00000000000..903b0aa124d --- /dev/null +++ b/scripts/search/tests/data/dataset-finch3.json @@ -0,0 +1,102 @@ +{ + "datasetVersion": { + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0" + }, + "metadataBlocks": { + "citation": { + "fields": [ + { + "value": "HTML & More", + "typeClass": "primitive", + "multiple": false, + "typeName": "title" + }, + { + "value": [ + { + "authorName": { + "value": "Markup, Marty", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorName" + }, + "authorAffiliation": { + "value": "W4C", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorAffiliation" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "author" + }, + { + "value": [ + { + "datasetContactEmail": { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactEmail", + "value": "markup@mailinator.com" + }, + "datasetContactName": { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactName", + "value": "Markup, Marty" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "datasetContact" + }, + { + "value": [ + { + "dsDescriptionValue": { + "value": "BEGIN

END", + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue" + }, + "dsDescriptionDate": { + "typeName": "dsDescriptionDate", + "multiple": false, + "typeClass": "primitive", + "value": "2021-07-13" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "dsDescription" + }, + { + "value": [ + "Medicine, Health and Life Sciences" + ], + "typeClass": "controlledVocabulary", + "multiple": true, + "typeName": "subject" + }, + { + "typeName": "language", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "English", + "Afar", + "aar" + ] + } + ], + "displayName": "Citation Metadata" + } + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 10b5d800c21..3d9ff19a617 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -22,7 +22,7 @@ import edu.harvard.iq.dataverse.storageuse.StorageQuota; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; -import edu.harvard.iq.dataverse.util.json.JsonUtil; + import java.io.File; import java.io.IOException; import java.sql.Timestamp; @@ -34,6 +34,7 @@ import java.util.logging.Logger; import java.util.Properties; +import edu.harvard.iq.dataverse.validation.JSONDataValidation; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; import jakarta.inject.Inject; @@ -1023,9 +1024,10 @@ private String getCustomMDBSchema (MetadataBlock mdb, List req public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias))); - try { + try { Schema schema = SchemaLoader.load(rawSchema); schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid + JSONDataValidation.validate(schema, jsonInput); // throws a ValidationException if any objects are invalid } catch (ValidationException vx) { logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); String accumulatedexceptions = ""; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java new file mode 100644 index 00000000000..110a4460313 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java @@ -0,0 +1,4 @@ +package edu.harvard.iq.dataverse.engine.command.exception; + +public class JsonSchemaConstraintException { +} diff --git a/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java b/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java new file mode 100644 index 00000000000..99b0fdd9edc --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java @@ -0,0 +1,207 @@ +package edu.harvard.iq.dataverse.validation; + +import com.mashape.unirest.http.JsonNode; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.util.BundleUtil; +import jakarta.enterprise.inject.spi.CDI; +import org.everit.json.schema.Schema; +import org.everit.json.schema.ValidationException; +import org.json.JSONArray; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class JSONDataValidation { + private static DatasetFieldServiceBean datasetFieldService = null; + private static Map> schemaDTOMap = new ConcurrentHashMap<>(); + + /** + * + * @param schema Schema file defining the JSON objects to be validated + * @param jsonInput JSON string to validate against the schema + * @throws ValidationException + */ + public static void validate(final Schema schema, String jsonInput) throws ValidationException { + if (datasetFieldService == null) { + datasetFieldService = CDI.current().select(DatasetFieldServiceBean.class).get(); + } + if (schemaDTOMap.isEmpty()) { + // TODO: load from a config file + schemaDTOMap.put("datasetContact", Collections.EMPTY_LIST); + schemaDTOMap.put("datasetContact.required", List.of("datasetContactName")); + schemaDTOMap.put("datasetContact.allowed", List.of("datasetContactName", "datasetContactEmail","datasetContactAffiliation")); + schemaDTOMap.put("dsDescription", Collections.EMPTY_LIST); + schemaDTOMap.put("dsDescription.required", List.of("dsDescriptionValue")); + schemaDTOMap.put("dsDescription.allowed", List.of("dsDescriptionValue", "dsDescriptionDate")); + } + JsonNode node = new JsonNode(jsonInput); + if (node.isArray()) { + JSONArray arrayNode = node.getArray(); + validateObject(schema, "root", arrayNode.toList()); + } else { + node.getObject().toMap().forEach((k,v) -> { + validateObject(schema, k, (v instanceof JSONArray) ? ((JSONArray) v).toList() : v); + }); + } + } + + /* + * Validate objects recursively + */ + private static void validateObject(final Schema schema, String key, Object value) { + if (value instanceof Map) { + validateSchemaObject(schema, key, (Map) value); + + ((Map) value).entrySet().forEach(e -> { + validateObject(schema, (String) e.getKey(), e.getValue()); + }); + } else if (value instanceof List) { + ((List) value).listIterator().forEachRemaining(v -> { + validateObject(schema, key, v); + }); + } + } + + /* + * Validate objects specific to a type. Currently only validating Datasets + */ + private static void validateSchemaObject(final Schema schema, String key, Map valueMap) { + if (schema.definesProperty("datasetVersion")) { + validateDatasetObject(schema, key, valueMap); + } + } + + /* + * Specific validation for Dataset objects + */ + private static void validateDatasetObject(final Schema schema, String key, Map valueMap) { + if (valueMap != null && valueMap.containsKey("typeClass")) { + validateTypeClass(schema, key, valueMap, valueMap.get("value"), "dataset"); + } + } + + /* + * key: The name of the parent object + * valueMap: Map of all the metadata of the object + * value: The value field of the object + * messageType: Refers to the parent: if this is an object from a dataset the messageType would be 'dataset' + * This needs to match the Bundle.properties for mapping the error messages when an exception occurs + * + * Rules for typeClass: + * The contents of value depend on the field attributes + * if single/primitive, value is a String + * if multiple, value is a JsonArray + * multiple/primitive: each JsonArray element will contain String + * multiple/compound: each JsonArray element will contain Set of FieldDTOs + */ + private static void validateTypeClass(Schema schema, String key, Map valueMap, Object value, String messageType) { + + String typeClass = valueMap.containsKey("typeClass") ? valueMap.get("typeClass").toString() : ""; + String typeName = valueMap.containsKey("typeName") ? valueMap.get("typeName").toString() : ""; + boolean multiple = Boolean.valueOf(String.valueOf(valueMap.getOrDefault("multiple", "false"))); + + // make sure there is a value since 'value' is required + if (value == null) { + throwValidationException("value.missing", List.of(key, typeName)); + } + + if (multiple && !(value instanceof List)) { + throwValidationException("notlist.multiple", List.of(key, typeName, typeClass)); + } + if (!multiple && value instanceof List) { + throwValidationException("list.notmultiple", List.of(key, typeName)); + } + if ("primitive".equals(typeClass) && !multiple && !(value instanceof String)) { + throwValidationException("type", List.of(key, typeName, typeClass)); + } + if ("primitive".equals(typeClass) && multiple) { + ((List) value).listIterator().forEachRemaining(primitive -> { + if (!(primitive instanceof String)) { + throwValidationException("type", List.of(key, typeName, typeClass)); + } + }); + } + if ("compound".equals(typeClass)) { + if (multiple && value instanceof List) { + ((List) value).listIterator().forEachRemaining(item -> { + if (!(item instanceof Map)) { + throwValidationException("compound", List.of(key, typeName, typeClass)); + } else { + ((Map) item).forEach((k,val) -> { + if (!(val instanceof Map)) { + throwValidationException("compound", List.of(key, typeName, typeClass)); + } + // validate mismatch between compound object key and typeName in value + String valTypeName = ((Map) val).containsKey("typeName") ? (String)((Map) val).get("typeName") : ""; + if (!k.equals(valTypeName)) { + throwValidationException("compound.mismatch", List.of((String)k, valTypeName)); + } + validateChildObject(schema, (String)k, val, messageType + "." + typeName, + schemaDTOMap.getOrDefault(typeName+".required", Collections.EMPTY_LIST), schemaDTOMap.getOrDefault(typeName+".allowed", Collections.EMPTY_LIST)); + }); + } + }); + } + } + + if ("controlledVocabulary".equals(typeClass)) { + DatasetFieldType dsft = datasetFieldService.findByName(typeName); + if (value instanceof List) { + ((List) value).listIterator().forEachRemaining(cvv -> { + if (datasetFieldService.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(dsft, (String) cvv, true) == null) { + throwValidationException("dataset", "cvv.missing", List.of(key, typeName, (String) cvv)); + } + }); + } else { + if (datasetFieldService.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(dsft, (String) value, true) == null) { + throwValidationException("dataset", "cvv.missing", List.of(key, typeName, (String) value)); + } + } + } + } + + // If value is another object or list of objects that need to be further validated then childType refers to the parent + // Example: If this is a dsDescriptionValue from a dataset the messageType would be dataset.dsDescriptionValue + // This needs to match the Bundle.properties for mapping the error messages when an exception occurs + private static void validateChildObject(Schema schema, String key, Object child, String messageType, List requiredFields, List allowedFields) { + if (child instanceof Map) { + Map childMap = (Map) child; + + if (!childMap.containsKey("value")) { // if child is simple key/value where the value Map is what we really want to validate + requiredFields.forEach(field -> { + if (!childMap.containsKey(field)) { + throwValidationException(messageType, "required.missing", List.of(key, field)); + } + }); + childMap.forEach((k, v) -> { + if (!allowedFields.isEmpty() && !allowedFields.contains(k)) { + throwValidationException(messageType, "invalidType", List.of(key, (String) k)); + } + }); + childMap.forEach((k,v) -> { + Map valueMap = (v instanceof Map) ? (Map) v : null; + if (valueMap == null || !k.equals(valueMap.get("typeName"))) { + throwValidationException(messageType, "invalidType", List.of(key, (String) k)); + } + validateChildObject(schema, (String)k, v, messageType, requiredFields, allowedFields); + }); + } else { // this child is an object with a "value" and "typeName" attribute + String typeName = childMap.containsKey("typeName") ? childMap.get("typeName").toString() : ""; + validateTypeClass(schema, typeName, childMap, childMap.get("value"), messageType); + } + } + } + private static void throwValidationException(String key, List argList) { + throw new ValidationException(BundleUtil.getStringFromBundle("schema.validation.exception." + key, argList)); + } + private static void throwValidationException(String type, String message, List argList) { + if (type != null) { + throwValidationException(type + "." + message, argList); + } else { + throwValidationException(message, argList); + } + } +} diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 02d848df1e3..9a82d7569e5 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -3007,3 +3007,15 @@ publishDatasetCommand.pidNotReserved=Cannot publish dataset because its persiste api.errors.invalidApiToken=Invalid API token. api.ldninbox.citation.alert={0},

The {1} has just been notified that the {2}, {3}, cites "{6}" in this repository. api.ldninbox.citation.subject={0}: A Dataset Citation has been reported! + +#Schema Validation +schema.validation.exception.value.missing=Invalid data for key:{0} typeName:{1}. 'value' missing. +schema.validation.exception.list.notmultiple=Invalid data for key:{0} typeName:{1}. Found value as list but ''multiple'' is set to false. +schema.validation.exception.notlist.multiple=Invalid data for key:{0} typeName:{1}. Fields with ''multiple'' set to true must be a list. +schema.validation.exception.compound=Compound data type must be accompanied by a value that is either an object (multiple=false) or a list of objects (multiple=true) +schema.validation.exception.compound.mismatch=Compound value {0} must match typeName of the object. Found {1} +schema.validation.exception.dataset.cvv.missing=Controlled vocabulary for key:{0} typeName:{1} value:''{2}'' is not valid. +schema.validation.exception.dataset.dsDescription.required.missing=Invalid data for key:{0} typeName:{1}. dsDescriptionValue is required if field type is dsDescription. +schema.validation.exception.dataset.dsDescription.invalidType=Invalid data for key:{0} typeName:{1}. Only dsDescriptionValue and dsDescriptionDate allowed. +schema.validation.exception.dataset.datasetContact.required.missing=Invalid data for key:{0} typeName:{1}. datasetContactName is required if field type is datasetContact. +schema.validation.exception.dataset.datasetContact.invalidType=Invalid data for key:{0} typeName:{1}. Only datasetContactName, datasetContactEmail and datasetContactAffiliation allowed. \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 5b603d88c6d..5d0bb6e2fad 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -182,6 +182,77 @@ public void testCollectionSchema(){ } + @Test + public void testDatasetSchemaValidation() { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response getCollectionSchemaResponse = UtilIT.getCollectionSchema(dataverseAlias, apiToken); + getCollectionSchemaResponse.prettyPrint(); + getCollectionSchemaResponse.then().assertThat() + .statusCode(200); + + JsonObject expectedSchema = null; + try { + expectedSchema = JsonUtil.getJsonObjectFromFile("doc/sphinx-guides/source/_static/api/dataset-schema.json"); + } catch (IOException ex) { + } + + assertEquals(JsonUtil.prettyPrint(expectedSchema), JsonUtil.prettyPrint(getCollectionSchemaResponse.body().asString())); + + // add a language that is not in the Controlled vocabulary + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "\"aar\"", + "\"aar\",\"badlang\"", + BundleUtil.getStringFromBundle("schema.validation.exception.dataset.cvv.missing", List.of("fields", "language", "badlang")) + ); + + // change multiple to true on value that is a not a List + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "multiple\": false,\n" + + " \"typeName\": \"title", + "multiple\": true,\n" + + " \"typeName\": \"title", + BundleUtil.getStringFromBundle("schema.validation.exception.notlist.multiple", List.of("fields", "title")) + ); + + // change multiple to false on value that is a List + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "typeName\": \"language\",\n" + + " \"multiple\": true", + "typeName\": \"language\",\n" + + " \"multiple\": false", + BundleUtil.getStringFromBundle("schema.validation.exception.list.notmultiple", List.of("fields", "language")) + ); + + // add a mismatched typeName + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "\"typeName\": \"datasetContactName\",", + "\"typeName\": \"datasetContactNme\",", + BundleUtil.getStringFromBundle("schema.validation.exception.compound.mismatch", List.of("datasetContactName", "datasetContactNme")) + ); + + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); + deleteDataverseResponse.prettyPrint(); + assertEquals(200, deleteDataverseResponse.getStatusCode()); + } + private void testDatasetSchemaValidationHelper(String dataverseAlias, String apiToken, String origString, String replacementString, String expectedError) { + String json = UtilIT.getDatasetJson("scripts/search/tests/data/dataset-finch3.json"); + json = json.replace(origString, replacementString); + Response validateDatasetJsonResponse = UtilIT.validateDatasetJson(dataverseAlias, json, apiToken); + validateDatasetJsonResponse.prettyPrint(); + validateDatasetJsonResponse.then().assertThat() + .statusCode(200) + .body(containsString(expectedError)); + } + @Test public void testCreateDataset() { diff --git a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java new file mode 100644 index 00000000000..25bdc9fe3af --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java @@ -0,0 +1,327 @@ +package edu.harvard.iq.dataverse.validation; + +import edu.harvard.iq.dataverse.ControlledVocabularyValue; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import org.everit.json.schema.Schema; +import org.everit.json.schema.ValidationException; +import org.everit.json.schema.loader.SchemaLoader; +import org.json.JSONObject; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.Mockito; + +import java.lang.reflect.Field; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.fail; +import static org.mockito.ArgumentMatchers.any; + +public class JSONDataValidationTest { + + @Mock + static DatasetFieldServiceBean datasetFieldServiceMock; + @Mock + static DatasetFieldType datasetFieldTypeMock; + static ControlledVocabularyValue cvv = new ControlledVocabularyValue(); + static String rawSchema() { + return """ + { + "$schema": "http://json-schema.org/draft-04/schema#", + "$defs": { + "field": { + "type": "object", + "required": ["typeClass", "multiple", "typeName"], + "properties": { + "value": { + "anyOf": [ + { + "type": "array" + }, + { + "type": "string" + }, + { + "$ref": "#/$defs/field" + } + ] + }, + "typeClass": { + "type": "string" + }, + "multiple": { + "type": "boolean" + }, + "typeName": { + "type": "string" + } + } + } + }, + "type": "object", + "properties": { + "datasetVersion": { + "type": "object", + "properties": { + "license": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["name", "uri"] + }, + "metadataBlocks": { + "type": "object", + "properties": { + "citation": { + "type": "object", + "properties": { + "fields": { + "type": "array", + "items": { + "$ref": "#/$defs/field" + }, + "minItems": 5, + "allOf": [ + { + "contains": { + "properties": { + "typeName": { + "const": "title" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "author" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "datasetContact" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "dsDescription" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "subject" + } + } + } + } + ] + } + }, + "required": ["fields"] + } + }, + "required": ["citation"] + } + }, + "required": ["metadataBlocks"] + } + }, + "required": ["datasetVersion"] + } + """; + } + static String jsonInput() { + return """ + { + "datasetVersion": { + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0" + }, + "metadataBlocks": { + "citation": { + "fields": [ + { + "value": "Darwin's Finches", + "typeClass": "primitive", + "multiple": false, + "typeName": "title" + }, + { + "value": [ + { + "authorName": { + "value": "Finch, Fiona", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorName" + }, + "authorAffiliation": { + "value": "Birds Inc.", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorAffiliation" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "author" + }, + { + "value": [ + { "datasetContactEmail" : { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactEmail", + "value" : "finch@mailinator.com" + }, + "datasetContactName" : { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactName", + "value": "Finch, Fiona" + } + }], + "typeClass": "compound", + "multiple": true, + "typeName": "datasetContact" + }, + { + "value": [{ + "dsDescriptionValue":{ + "value": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue" + }, + "dsDescriptionDate": { + "typeName": "dsDescriptionDate", + "multiple": false, + "typeClass": "primitive", + "value": "2021-07-13" + } + }], + "typeClass": "compound", + "multiple": true, + "typeName": "dsDescription" + }, + { + "value": { + "dsDescriptionValue":{ + "value": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue" + }}, + "typeClass": "compound", + "multiple": false, + "typeName": "dsDescription" + }, + { + "value": [ + "Medicine, Health and Life Sciences", + "Social Sciences" + ], + "typeClass": "controlledVocabulary", + "multiple": true, + "typeName": "subject" + } + ], + "displayName": "Citation Metadata" + } + } + } + } + """; + } + + @BeforeAll + static void setup() throws NoSuchFieldException, IllegalAccessException { + datasetFieldServiceMock = Mockito.mock(DatasetFieldServiceBean.class); + datasetFieldTypeMock = Mockito.mock(DatasetFieldType.class); + Field datasetFieldServiceField = JSONDataValidation.class.getDeclaredField("datasetFieldService"); + datasetFieldServiceField.setAccessible(true); + datasetFieldServiceField.set(JSONDataValidation.class, datasetFieldServiceMock); + + Mockito.when(datasetFieldServiceMock.findByName(any(String.class))).thenReturn(datasetFieldTypeMock); + List cvvList = List.of("Medicine, Health and Life Sciences", "Social Sciences"); + cvvList.forEach(i -> { + Mockito.when(datasetFieldServiceMock.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(datasetFieldTypeMock, i,true)).thenReturn(cvv); + }); + Mockito.when(datasetFieldServiceMock.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(datasetFieldTypeMock, "Bad",true)).thenReturn(null); + } + @Test + public void testGoodJson() { + Schema schema = SchemaLoader.load(new JSONObject(rawSchema())); + JSONDataValidation.validate(schema, jsonInput()); + } + @Test + public void testBadJson() { + Schema schema = SchemaLoader.load(new JSONObject(rawSchema())); + try { + JSONDataValidation.validate(schema, jsonInput().replace("\"Social Sciences\"", "\"Social Sciences\",\"Bad\"")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + System.out.println(e.getStackTrace()); + } + + try { + // test multiple = false but value is list + JSONDataValidation.validate(schema, jsonInput().replaceAll("true", "false")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + // verify that child objects are also validated + String childTest = "\"multiple\": false, \"typeName\": \"authorAffiliation\""; + try { + String trimmedStr = jsonInput().replaceAll("\\s{2,}", " "); + // test child object with multiple set to true + JSONDataValidation.validate(schema, trimmedStr.replace(childTest, childTest.replace("false", "true"))); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + try { + // test dsDescription but dsDescriptionValue missing + JSONDataValidation.validate(schema, jsonInput().replace("typeName\": \"dsDescriptionValue", "typeName\": \"notdsDescriptionValue")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + try { + // test dsDescription but child dsDescriptionValue missing + JSONDataValidation.validate(schema, jsonInput().replace("dsDescriptionValue\":{", "notdsDescriptionValue\":{")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + } +} From 1d394ea901df7cadb7277e47e51eb25716461a2f Mon Sep 17 00:00:00 2001 From: Paul Boon Date: Wed, 8 May 2024 15:29:32 +0200 Subject: [PATCH 076/622] Update doc/sphinx-guides/source/installation/config.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index b6555397f75..034e91187cc 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2947,7 +2947,7 @@ If not set, the :ref:`systemEmail` is used for the feedback API/contact form ema Note that only the email address is required, which you can supply without the ``<`` and ``>`` signs, but if you include the text, it's the way to customize the name of your support team, which appears in the "from" address in emails as well as in help text in the UI. If you don't include the text, the installation name (see :ref:`Branding Your Installation`) will appear in the "from" address. -Also note that the support email address is used at the end of notification mails where it states; 'contact us for support at', followed by the support mail address if configured and the system email otherwise. +Also note that the support email address is used at the end of notification mails where it states; "contact us for support at", followed by the support mail address if configured and the system email otherwise. Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_MAIL_SUPPORT_EMAIL``. From ca4202f4d85857368d29cf16a2e2ca4d4f7d6933 Mon Sep 17 00:00:00 2001 From: Paul Boon Date: Wed, 8 May 2024 15:29:52 +0200 Subject: [PATCH 077/622] Update doc/release-notes/10287-use-support-address-in-system-email-text.md Co-authored-by: Philip Durbin --- .../10287-use-support-address-in-system-email-text.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/10287-use-support-address-in-system-email-text.md b/doc/release-notes/10287-use-support-address-in-system-email-text.md index 7375fda68e2..4c294404298 100644 --- a/doc/release-notes/10287-use-support-address-in-system-email-text.md +++ b/doc/release-notes/10287-use-support-address-in-system-email-text.md @@ -1,4 +1,4 @@ ### Notification Email Improvement -The system email text has been improved to use the support email address (`dataverse.mail.support-email`) in the text where it states; 'contact us for support at', instead of the default system email address. +The system email text has been improved to use the support email address (`dataverse.mail.support-email`) in the text where it states; "contact us for support at", instead of the default system email address. Using the system email address here was particularly problematic when it was a 'noreply' address. From 33d6b56777f445e51b619ef91fa6186c0aa38fd6 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Thu, 9 May 2024 15:40:54 -0400 Subject: [PATCH 078/622] rework --- .../iq/dataverse/DataverseServiceBean.java | 23 ++++- .../validation/JSONDataValidation.java | 93 ++++++++----------- src/main/java/propertyFiles/Bundle.properties | 6 +- .../harvard/iq/dataverse/api/DatasetsIT.java | 13 +++ .../validation/JSONDataValidationTest.java | 61 ++++++++---- 5 files changed, 114 insertions(+), 82 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 3d9ff19a617..00774bbd3bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -889,14 +889,16 @@ public List getDatasetTitlesWithinDataverse(Long dataverseId) { return em.createNativeQuery(cqString).getResultList(); } - public String getCollectionDatasetSchema(String dataverseAlias) { + return getCollectionDatasetSchema(dataverseAlias, null); + } + public String getCollectionDatasetSchema(String dataverseAlias, Map>> schemaChildMap) { Dataverse testDV = this.findByAlias(dataverseAlias); while (!testDV.isMetadataBlockRoot()) { if (testDV.getOwner() == null) { - break; // we are at the root; which by defintion is metadata blcok root, regarldess of the value + break; // we are at the root; which by definition is metadata block root, regardless of the value } testDV = testDV.getOwner(); } @@ -933,6 +935,8 @@ public String getCollectionDatasetSchema(String dataverseAlias) { dsft.setRequiredDV(dsft.isRequired()); dsft.setInclude(true); } + List childrenRequired = new ArrayList<>(); + List childrenAllowed = new ArrayList<>(); if (dsft.isHasChildren()) { for (DatasetFieldType child : dsft.getChildDatasetFieldTypes()) { DataverseFieldTypeInputLevel dsfIlChild = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), child.getId()); @@ -945,8 +949,18 @@ public String getCollectionDatasetSchema(String dataverseAlias) { child.setRequiredDV(child.isRequired() && dsft.isRequired()); child.setInclude(true); } + if (child.isRequired()) { + childrenRequired.add(child.getName()); + } + childrenAllowed.add(child.getName()); } } + if (schemaChildMap != null) { + Map> map = new HashMap<>(); + map.put("required", childrenRequired); + map.put("allowed", childrenAllowed); + schemaChildMap.put(dsft.getName(), map); + } if(dsft.isRequiredDV()){ requiredDSFT.add(dsft); } @@ -1022,12 +1036,13 @@ private String getCustomMDBSchema (MetadataBlock mdb, List req } public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { - JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias))); + Map>> schemaChildMap = new HashMap<>(); + JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias, schemaChildMap))); try { Schema schema = SchemaLoader.load(rawSchema); schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid - JSONDataValidation.validate(schema, jsonInput); // throws a ValidationException if any objects are invalid + JSONDataValidation.validate(schema, schemaChildMap, jsonInput); // throws a ValidationException if any objects are invalid } catch (ValidationException vx) { logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); String accumulatedexceptions = ""; diff --git a/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java b/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java index 99b0fdd9edc..fb19a14e7de 100644 --- a/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java +++ b/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java @@ -9,14 +9,13 @@ import org.everit.json.schema.ValidationException; import org.json.JSONArray; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; +import java.util.*; +import java.util.logging.Logger; +import java.util.stream.Collectors; public class JSONDataValidation { + private static final Logger logger = Logger.getLogger(JSONDataValidation.class.getCanonicalName()); private static DatasetFieldServiceBean datasetFieldService = null; - private static Map> schemaDTOMap = new ConcurrentHashMap<>(); /** * @@ -24,26 +23,17 @@ public class JSONDataValidation { * @param jsonInput JSON string to validate against the schema * @throws ValidationException */ - public static void validate(final Schema schema, String jsonInput) throws ValidationException { + public static void validate(Schema schema, Map>> schemaChildMap, String jsonInput) throws ValidationException { if (datasetFieldService == null) { datasetFieldService = CDI.current().select(DatasetFieldServiceBean.class).get(); } - if (schemaDTOMap.isEmpty()) { - // TODO: load from a config file - schemaDTOMap.put("datasetContact", Collections.EMPTY_LIST); - schemaDTOMap.put("datasetContact.required", List.of("datasetContactName")); - schemaDTOMap.put("datasetContact.allowed", List.of("datasetContactName", "datasetContactEmail","datasetContactAffiliation")); - schemaDTOMap.put("dsDescription", Collections.EMPTY_LIST); - schemaDTOMap.put("dsDescription.required", List.of("dsDescriptionValue")); - schemaDTOMap.put("dsDescription.allowed", List.of("dsDescriptionValue", "dsDescriptionDate")); - } JsonNode node = new JsonNode(jsonInput); if (node.isArray()) { JSONArray arrayNode = node.getArray(); - validateObject(schema, "root", arrayNode.toList()); + validateObject(schema, schemaChildMap, "root", arrayNode.toList()); } else { node.getObject().toMap().forEach((k,v) -> { - validateObject(schema, k, (v instanceof JSONArray) ? ((JSONArray) v).toList() : v); + validateObject(schema, schemaChildMap, k, (v instanceof JSONArray) ? ((JSONArray) v).toList() : v); }); } } @@ -51,16 +41,16 @@ public static void validate(final Schema schema, String jsonInput) throws Valida /* * Validate objects recursively */ - private static void validateObject(final Schema schema, String key, Object value) { + private static void validateObject(Schema schema, Map>> schemaChildMap, String key, Object value) { if (value instanceof Map) { - validateSchemaObject(schema, key, (Map) value); + validateSchemaObject(schema, schemaChildMap, key, (Map) value); ((Map) value).entrySet().forEach(e -> { - validateObject(schema, (String) e.getKey(), e.getValue()); + validateObject(schema, schemaChildMap, (String) e.getKey(), e.getValue()); }); } else if (value instanceof List) { ((List) value).listIterator().forEachRemaining(v -> { - validateObject(schema, key, v); + validateObject(schema, schemaChildMap, key, v); }); } } @@ -68,18 +58,18 @@ private static void validateObject(final Schema schema, String key, Object value /* * Validate objects specific to a type. Currently only validating Datasets */ - private static void validateSchemaObject(final Schema schema, String key, Map valueMap) { + private static void validateSchemaObject(Schema schema, Map>> schemaChildMap, String key, Map valueMap) { if (schema.definesProperty("datasetVersion")) { - validateDatasetObject(schema, key, valueMap); + validateDatasetObject(schema, schemaChildMap, key, valueMap); } } /* * Specific validation for Dataset objects */ - private static void validateDatasetObject(final Schema schema, String key, Map valueMap) { + private static void validateDatasetObject(Schema schema, Map>> schemaChildMap, String key, Map valueMap) { if (valueMap != null && valueMap.containsKey("typeClass")) { - validateTypeClass(schema, key, valueMap, valueMap.get("value"), "dataset"); + validateTypeClass(schema, schemaChildMap, key, valueMap, valueMap.get("value"), "dataset"); } } @@ -97,7 +87,7 @@ private static void validateDatasetObject(final Schema schema, String key, Map v * multiple/primitive: each JsonArray element will contain String * multiple/compound: each JsonArray element will contain Set of FieldDTOs */ - private static void validateTypeClass(Schema schema, String key, Map valueMap, Object value, String messageType) { + private static void validateTypeClass(Schema schema, Map>> schemaChildMap, String key, Map valueMap, Object value, String messageType) { String typeClass = valueMap.containsKey("typeClass") ? valueMap.get("typeClass").toString() : ""; String typeName = valueMap.containsKey("typeName") ? valueMap.get("typeName").toString() : ""; @@ -135,13 +125,12 @@ private static void validateTypeClass(Schema schema, String key, Map valueMap, O throwValidationException("compound", List.of(key, typeName, typeClass)); } // validate mismatch between compound object key and typeName in value - String valTypeName = ((Map) val).containsKey("typeName") ? (String)((Map) val).get("typeName") : ""; + String valTypeName = ((Map) val).containsKey("typeName") ? (String) ((Map) val).get("typeName") : ""; if (!k.equals(valTypeName)) { - throwValidationException("compound.mismatch", List.of((String)k, valTypeName)); + throwValidationException("compound.mismatch", List.of((String) k, valTypeName)); } - validateChildObject(schema, (String)k, val, messageType + "." + typeName, - schemaDTOMap.getOrDefault(typeName+".required", Collections.EMPTY_LIST), schemaDTOMap.getOrDefault(typeName+".allowed", Collections.EMPTY_LIST)); }); + validateChildren(schema, schemaChildMap, key, ((Map) item).values(), typeName, messageType); } }); } @@ -166,32 +155,26 @@ private static void validateTypeClass(Schema schema, String key, Map valueMap, O // If value is another object or list of objects that need to be further validated then childType refers to the parent // Example: If this is a dsDescriptionValue from a dataset the messageType would be dataset.dsDescriptionValue // This needs to match the Bundle.properties for mapping the error messages when an exception occurs - private static void validateChildObject(Schema schema, String key, Object child, String messageType, List requiredFields, List allowedFields) { - if (child instanceof Map) { - Map childMap = (Map) child; - - if (!childMap.containsKey("value")) { // if child is simple key/value where the value Map is what we really want to validate - requiredFields.forEach(field -> { - if (!childMap.containsKey(field)) { - throwValidationException(messageType, "required.missing", List.of(key, field)); - } - }); - childMap.forEach((k, v) -> { - if (!allowedFields.isEmpty() && !allowedFields.contains(k)) { - throwValidationException(messageType, "invalidType", List.of(key, (String) k)); - } - }); - childMap.forEach((k,v) -> { - Map valueMap = (v instanceof Map) ? (Map) v : null; - if (valueMap == null || !k.equals(valueMap.get("typeName"))) { - throwValidationException(messageType, "invalidType", List.of(key, (String) k)); - } - validateChildObject(schema, (String)k, v, messageType, requiredFields, allowedFields); - }); - } else { // this child is an object with a "value" and "typeName" attribute - String typeName = childMap.containsKey("typeName") ? childMap.get("typeName").toString() : ""; - validateTypeClass(schema, typeName, childMap, childMap.get("value"), messageType); + private static void validateChildren(Schema schema, Map>> schemaChildMap, String key, Collection children, String typeName, String messageType) { + if (children == null || children.isEmpty()) { + return; + } + List requiredFields = new ArrayList<>(); + requiredFields.addAll((List)schemaChildMap.getOrDefault(typeName, Collections.EMPTY_MAP).getOrDefault("required", Collections.EMPTY_LIST)); + List allowedFields = (List)schemaChildMap.getOrDefault(typeName, Collections.EMPTY_MAP).getOrDefault("allowed", Collections.EMPTY_LIST); + children.forEach(child -> { + if (child instanceof Map) { + String childTypeName = ((Map) child).containsKey("typeName") ? (String)((Map) child).get("typeName") : ""; + if (!allowedFields.isEmpty() && !allowedFields.contains(childTypeName)) { + throwValidationException(messageType, "invalidType", List.of(typeName, childTypeName, allowedFields.stream().collect(Collectors.joining(", ")))); + } + if (!requiredFields.isEmpty() && requiredFields.contains(childTypeName)) { + requiredFields.remove(childTypeName); + } } + }); + if (!requiredFields.isEmpty()) { + throwValidationException(messageType, "required.missing", List.of(typeName, requiredFields.stream().collect(Collectors.joining(", ")), typeName)); } } private static void throwValidationException(String key, List argList) { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 9a82d7569e5..6355e71cb36 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -3015,7 +3015,5 @@ schema.validation.exception.notlist.multiple=Invalid data for key:{0} typeName:{ schema.validation.exception.compound=Compound data type must be accompanied by a value that is either an object (multiple=false) or a list of objects (multiple=true) schema.validation.exception.compound.mismatch=Compound value {0} must match typeName of the object. Found {1} schema.validation.exception.dataset.cvv.missing=Controlled vocabulary for key:{0} typeName:{1} value:''{2}'' is not valid. -schema.validation.exception.dataset.dsDescription.required.missing=Invalid data for key:{0} typeName:{1}. dsDescriptionValue is required if field type is dsDescription. -schema.validation.exception.dataset.dsDescription.invalidType=Invalid data for key:{0} typeName:{1}. Only dsDescriptionValue and dsDescriptionDate allowed. -schema.validation.exception.dataset.datasetContact.required.missing=Invalid data for key:{0} typeName:{1}. datasetContactName is required if field type is datasetContact. -schema.validation.exception.dataset.datasetContact.invalidType=Invalid data for key:{0} typeName:{1}. Only datasetContactName, datasetContactEmail and datasetContactAffiliation allowed. \ No newline at end of file +schema.validation.exception.dataset.invalidType=Invalid data for key:{0} typeName:{1}. Only {2} allowed. +schema.validation.exception.dataset.required.missing=Invalid data for key:{0}. {1} is(are) required if field type is {2}. \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 5d0bb6e2fad..1506fd8be90 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -239,6 +239,19 @@ public void testDatasetSchemaValidation() { BundleUtil.getStringFromBundle("schema.validation.exception.compound.mismatch", List.of("datasetContactName", "datasetContactNme")) ); + // add a typeName which is not allowed + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "\"datasetContactEmail\": {\n" + + " \"typeClass\": \"primitive\",\n" + + " \"multiple\": false,\n" + + " \"typeName\": \"datasetContactEmail\",", + "\"datasetContactNotAllowed\": {\n" + + " \"typeClass\": \"primitive\",\n" + + " \"multiple\": false,\n" + + " \"typeName\": \"datasetContactNotAllowed\",", + BundleUtil.getStringFromBundle("schema.validation.exception.dataset.invalidType", List.of("datasetContact", "datasetContactNotAllowed", "datasetContactName, datasetContactAffiliation, datasetContactEmail")) + ); + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); deleteDataverseResponse.prettyPrint(); assertEquals(200, deleteDataverseResponse.getStatusCode()); diff --git a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java index 25bdc9fe3af..4e60d013f87 100644 --- a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java @@ -3,18 +3,20 @@ import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.util.json.JsonUtil; import org.everit.json.schema.Schema; import org.everit.json.schema.ValidationException; import org.everit.json.schema.loader.SchemaLoader; import org.json.JSONObject; +import org.json.JSONTokener; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.mockito.Mock; import org.mockito.Mockito; import java.lang.reflect.Field; +import java.util.HashMap; import java.util.List; +import java.util.Map; import static org.junit.jupiter.api.Assertions.fail; import static org.mockito.ArgumentMatchers.any; @@ -26,6 +28,7 @@ public class JSONDataValidationTest { @Mock static DatasetFieldType datasetFieldTypeMock; static ControlledVocabularyValue cvv = new ControlledVocabularyValue(); + static Map>> schemaChildMap = new HashMap<>(); static String rawSchema() { return """ { @@ -228,18 +231,6 @@ static String jsonInput() { "multiple": true, "typeName": "dsDescription" }, - { - "value": { - "dsDescriptionValue":{ - "value": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", - "multiple": false, - "typeClass": "primitive", - "typeName": "dsDescriptionValue" - }}, - "typeClass": "compound", - "multiple": false, - "typeName": "dsDescription" - }, { "value": [ "Medicine, Health and Life Sciences", @@ -272,17 +263,33 @@ static void setup() throws NoSuchFieldException, IllegalAccessException { Mockito.when(datasetFieldServiceMock.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(datasetFieldTypeMock, i,true)).thenReturn(cvv); }); Mockito.when(datasetFieldServiceMock.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(datasetFieldTypeMock, "Bad",true)).thenReturn(null); + + Map> datasetContact = new HashMap<>(); + datasetContact.put("required", List.of("datasetContactName")); + datasetContact.put("allowed", List.of("datasetContactName", "datasetContactEmail","datasetContactAffiliation")); + schemaChildMap.put("datasetContact",datasetContact); + Map> dsDescription = new HashMap<>(); + dsDescription.put("required", List.of("dsDescriptionValue")); + dsDescription.put("allowed", List.of("dsDescriptionValue", "dsDescriptionDate")); + schemaChildMap.put("dsDescription",dsDescription); + + } + @Test + public void testSchema() { + JSONObject rawSchema = new JSONObject(new JSONTokener(rawSchema())); + Schema schema = SchemaLoader.load(rawSchema); + schema.validate(new JSONObject(jsonInput())); } @Test public void testGoodJson() { Schema schema = SchemaLoader.load(new JSONObject(rawSchema())); - JSONDataValidation.validate(schema, jsonInput()); + JSONDataValidation.validate(schema, schemaChildMap, jsonInput()); } @Test public void testBadJson() { Schema schema = SchemaLoader.load(new JSONObject(rawSchema())); try { - JSONDataValidation.validate(schema, jsonInput().replace("\"Social Sciences\"", "\"Social Sciences\",\"Bad\"")); + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replace("\"Social Sciences\"", "\"Social Sciences\",\"Bad\"")); fail(); } catch (ValidationException e) { System.out.println(e.getMessage()); @@ -291,7 +298,7 @@ public void testBadJson() { try { // test multiple = false but value is list - JSONDataValidation.validate(schema, jsonInput().replaceAll("true", "false")); + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replaceAll("true", "false")); fail(); } catch (ValidationException e) { System.out.println(e.getMessage()); @@ -302,7 +309,7 @@ public void testBadJson() { try { String trimmedStr = jsonInput().replaceAll("\\s{2,}", " "); // test child object with multiple set to true - JSONDataValidation.validate(schema, trimmedStr.replace(childTest, childTest.replace("false", "true"))); + JSONDataValidation.validate(schema, schemaChildMap, trimmedStr.replace(childTest, childTest.replace("false", "true"))); fail(); } catch (ValidationException e) { System.out.println(e.getMessage()); @@ -310,7 +317,7 @@ public void testBadJson() { try { // test dsDescription but dsDescriptionValue missing - JSONDataValidation.validate(schema, jsonInput().replace("typeName\": \"dsDescriptionValue", "typeName\": \"notdsDescriptionValue")); + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replace("typeName\": \"dsDescriptionValue", "typeName\": \"notdsDescriptionValue")); fail(); } catch (ValidationException e) { System.out.println(e.getMessage()); @@ -318,7 +325,23 @@ public void testBadJson() { try { // test dsDescription but child dsDescriptionValue missing - JSONDataValidation.validate(schema, jsonInput().replace("dsDescriptionValue\":{", "notdsDescriptionValue\":{")); + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replace("dsDescriptionValue\":{", "notdsDescriptionValue\":{")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + try { + // test required dataType missing + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replaceAll("\"datasetContactName\"", "\"datasetContactAffiliation\"")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + try { + // test dataType not allowed + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replaceAll("\"datasetContactEmail\"", "\"datasetContactNotAllowed\"")); fail(); } catch (ValidationException e) { System.out.println(e.getMessage()); From ba8b01b44b18d776b636191cf3ad02a1ccace787 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Fri, 10 May 2024 10:04:51 -0400 Subject: [PATCH 079/622] doc changes --- doc/release-notes/10169-JSON-schema-validation.md | 4 ++++ doc/sphinx-guides/source/api/native-api.rst | 5 +++-- .../iq/dataverse/validation/JSONDataValidationTest.java | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 doc/release-notes/10169-JSON-schema-validation.md diff --git a/doc/release-notes/10169-JSON-schema-validation.md b/doc/release-notes/10169-JSON-schema-validation.md new file mode 100644 index 00000000000..ae47f6a1401 --- /dev/null +++ b/doc/release-notes/10169-JSON-schema-validation.md @@ -0,0 +1,4 @@ +### JSON Schema for datasets + +Enhanced JSON schema validation with checks for required and allowed child objects, Type checking for field types including: ''primative''; ''compound''; and ''controlledVocabulary'' . More user-friendly error messages to help pinpoint the issues in the Dataset JSON. Rules are driven off the database schema, so no manual configuration is needed. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.1/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10169. + diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index c30f551685c..c7447deb22a 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -573,10 +573,11 @@ typeClass must follow these rules: - if multiple = true then value must be a list - if typeClass = ''primitive'' the value object is a String or a List of Strings depending on the multiple flag - if typeClass = ''compound'' the value object is a FieldDTO or a List of FieldDTOs depending on the multiple flag -- if typeClass = ''controlledVocabulary'' the value(s) are checked against the list of known values -typeName validations include: +- if typeClass = ''controlledVocabulary'' the value(s) are checked against the list of known values stored in the database +typeName validations (child objects with their required and allowed typeNames are configured automatically by the database schema). Examples include: - dsDescription validation includes checks for typeName = ''dsDescriptionValue'' (required) and ''dsDescriptionDate'' (optional) - datasetContact validation includes checks for typeName = ''datasetContactName'' (required) and ''datasetContactEmail''; ''datasetContactAffiliation'' (optional) +- etc. .. code-block:: bash diff --git a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java index 4e60d013f87..e88dc9f4bd9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java @@ -281,12 +281,12 @@ public void testSchema() { schema.validate(new JSONObject(jsonInput())); } @Test - public void testGoodJson() { + public void testValid() { Schema schema = SchemaLoader.load(new JSONObject(rawSchema())); JSONDataValidation.validate(schema, schemaChildMap, jsonInput()); } @Test - public void testBadJson() { + public void testInvalid() { Schema schema = SchemaLoader.load(new JSONObject(rawSchema())); try { JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replace("\"Social Sciences\"", "\"Social Sciences\",\"Bad\"")); From fe297bad1e90c2c6d5071589acbea4732178bebb Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Fri, 10 May 2024 10:07:43 -0400 Subject: [PATCH 080/622] remove unused class --- .../command/exception/JsonSchemaConstraintException.java | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java deleted file mode 100644 index 110a4460313..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java +++ /dev/null @@ -1,4 +0,0 @@ -package edu.harvard.iq.dataverse.engine.command.exception; - -public class JsonSchemaConstraintException { -} From 4878cfe47a284f029b2d98adb64d02dafdb540b6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:31:06 -0400 Subject: [PATCH 081/622] separate metadata parsing/params from XML generation code --- .../pidproviders/doi/AbstractDOIProvider.java | 25 ++-- .../pidproviders/doi/DoiMetadata.java | 138 ++++++++++++++++++ .../datacite/DOIDataCiteRegisterService.java | 57 ++++---- 3 files changed, 180 insertions(+), 40 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java index 43e34e74c59..02a7dedce47 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java @@ -91,31 +91,30 @@ public String getMetadataFromDvObject(String identifier, Map met } else { dataset = (Dataset) dvObject.getOwner(); } - - XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); - metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); + DoiMetadata doiMetadata = new DoiMetadata(); + doiMetadata.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + doiMetadata.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + doiMetadata.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); if (dvObject.isInstanceofDataset()) { - metadataTemplate.setDescription(dataset.getLatestVersion().getDescriptionPlainText()); + doiMetadata.setDescription(dataset.getLatestVersion().getDescriptionPlainText()); } if (dvObject.isInstanceofDataFile()) { DataFile df = (DataFile) dvObject; String fileDescription = df.getDescription(); - metadataTemplate.setDescription(fileDescription == null ? "" : fileDescription); + doiMetadata.setDescription(fileDescription == null ? "" : fileDescription); } - metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); - metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); - metadataTemplate.setTitle(dvObject.getCurrentName()); + doiMetadata.setContacts(dataset.getLatestVersion().getDatasetContacts()); + doiMetadata.setProducers(dataset.getLatestVersion().getDatasetProducers()); + doiMetadata.setTitle(dvObject.getCurrentName()); String producerString = pidProviderService.getProducer(); if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { producerString = UNAVAILABLE; } - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + doiMetadata.setPublisher(producerString); + doiMetadata.setPublisherYear(metadata.get("datacite.publicationyear")); - String xmlMetadata = metadataTemplate.generateXML(dvObject); + String xmlMetadata = new XmlMetadataTemplate(doiMetadata).generateXML(dvObject); logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); return xmlMetadata; } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java new file mode 100644 index 00000000000..ffd24747bc2 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java @@ -0,0 +1,138 @@ +package edu.harvard.iq.dataverse.pidproviders.doi; + +import java.util.ArrayList; +import java.util.List; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import edu.harvard.iq.dataverse.DatasetAuthor; + + +//Parses some specific parts of a DataCite XML metadata file +public class DoiMetadata { + + private String identifier; + private List creators; + private String title; + private String publisher; + private String publisherYear; + private List datafileIdentifiers; + private List authors; + private String description; + private List contacts; + private List producers; + + + public DoiMetadata() { + } + + public void parseDataCiteXML(String xmlMetaData) { + Document doc = Jsoup.parseBodyFragment(xmlMetaData); + Elements identifierElements = doc.select("identifier"); + if (identifierElements.size() > 0) { + identifier = identifierElements.get(0).html(); + } + Elements creatorElements = doc.select("creatorName"); + creators = new ArrayList<>(); + for (Element creatorElement : creatorElements) { + creators.add(creatorElement.html()); + } + Elements titleElements = doc.select("title"); + if (titleElements.size() > 0) { + title = titleElements.get(0).html(); + } + Elements publisherElements = doc.select("publisher"); + if (publisherElements.size() > 0) { + publisher = publisherElements.get(0).html(); + } + Elements publisherYearElements = doc.select("publicationYear"); + if (publisherYearElements.size() > 0) { + publisherYear = publisherYearElements.get(0).html(); + } + } + + public String getIdentifier() { + return identifier; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + public List getCreators() { + return creators; + } + + public void setCreators(List creators) { + this.creators = creators; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getPublisher() { + return publisher; + } + + public void setPublisher(String publisher) { + this.publisher = publisher; + } + + public String getPublisherYear() { + return publisherYear; + } + + public void setPublisherYear(String publisherYear) { + this.publisherYear = publisherYear; + } + + + public List getProducers() { + return producers; + } + + public void setProducers(List producers) { + this.producers = producers; + } + + public List getContacts() { + return contacts; + } + + public void setContacts(List contacts) { + this.contacts = contacts; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public List getAuthors() { + return authors; + } + + public void setAuthors(List authors) { + this.authors = authors; + } + + + public List getDatafileIdentifiers() { + return datafileIdentifiers; + } + + public void setDatafileIdentifiers(List datafileIdentifiers) { + this.datafileIdentifiers = datafileIdentifiers; + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java index 0e322eace05..bc69275ac1d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java @@ -21,6 +21,7 @@ import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.DoiMetadata; import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; /** @@ -90,28 +91,28 @@ public static String getMetadataFromDvObject(String identifier, Map from HTML, it leaves '&' (at least so we need to xml escape as well String description = StringEscapeUtils.escapeXml10(dataset.getLatestVersion().getDescriptionPlainText()); if (description.isEmpty() || description.equals(DatasetField.NA_VALUE)) { description = AbstractPidProvider.UNAVAILABLE; } - metadataTemplate.setDescription(description); + doiMetadata.setDescription(description); } if (dvObject.isInstanceofDataFile()) { DataFile df = (DataFile) dvObject; //Note: File metadata is not escaped like dataset metadata is, so adding an xml escape here. //This could/should be removed if the datafile methods add escaping String fileDescription = StringEscapeUtils.escapeXml10(df.getDescription()); - metadataTemplate.setDescription(fileDescription == null ? AbstractPidProvider.UNAVAILABLE : fileDescription); + doiMetadata.setDescription(fileDescription == null ? AbstractPidProvider.UNAVAILABLE : fileDescription); } - metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); - metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); + doiMetadata.setContacts(dataset.getLatestVersion().getDatasetContacts()); + doiMetadata.setProducers(dataset.getLatestVersion().getDatasetProducers()); String title = dvObject.getCurrentName(); if(dvObject.isInstanceofDataFile()) { //Note file title is not currently escaped the way the dataset title is, so adding it here. @@ -122,40 +123,41 @@ public static String getMetadataFromDvObject(String identifier, Map metadata, DvObject dvObject) { - XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + DoiMetadata doiMetadata = new DoiMetadata(); + + doiMetadata.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + doiMetadata.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); - metadataTemplate.setDescription(AbstractPidProvider.UNAVAILABLE); + doiMetadata.setDescription(AbstractPidProvider.UNAVAILABLE); String title =metadata.get("datacite.title"); System.out.print("Map metadata title: "+ metadata.get("datacite.title")); - metadataTemplate.setAuthors(null); + doiMetadata.setAuthors(null); - metadataTemplate.setTitle(title); + doiMetadata.setTitle(title); String producerString = AbstractPidProvider.UNAVAILABLE; - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + doiMetadata.setPublisher(producerString); + doiMetadata.setPublisherYear(metadata.get("datacite.publicationyear")); - String xmlMetadata = metadataTemplate.generateXML(dvObject); + String xmlMetadata = new XmlMetadataTemplate(doiMetadata).generateXML(dvObject); logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); return xmlMetadata; } @@ -209,11 +211,12 @@ Map getMetadata(String identifier) throws IOException { Map metadata = new HashMap<>(); try { String xmlMetadata = client.getMetadata(identifier.substring(identifier.indexOf(":") + 1)); - XmlMetadataTemplate template = new XmlMetadataTemplate(xmlMetadata); - metadata.put("datacite.creator", String.join("; ", template.getCreators())); - metadata.put("datacite.title", template.getTitle()); - metadata.put("datacite.publisher", template.getPublisher()); - metadata.put("datacite.publicationyear", template.getPublisherYear()); + DoiMetadata doiMetadata = new DoiMetadata(); + doiMetadata.parseDataCiteXML(xmlMetadata); + metadata.put("datacite.creator", String.join("; ", doiMetadata.getCreators())); + metadata.put("datacite.title", doiMetadata.getTitle()); + metadata.put("datacite.publisher", doiMetadata.getPublisher()); + metadata.put("datacite.publicationyear", doiMetadata.getPublisherYear()); } catch (RuntimeException e) { logger.log(Level.INFO, identifier, e); } From 68792c2f92c90f716f39caaa5f76b652592186c0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:31:37 -0400 Subject: [PATCH 082/622] extract some common xml writing util code --- .../dataverse/export/ddi/DdiExportUtil.java | 486 ++++++------------ .../iq/dataverse/util/xml/XmlWriterUtil.java | 174 +++++++ 2 files changed, 340 insertions(+), 320 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 9a689f7a4ed..0c861cb6c09 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -24,6 +24,8 @@ import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; +import edu.harvard.iq.dataverse.util.xml.XmlWriterUtil; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -111,9 +113,9 @@ private static void dtoddi(DatasetDTO datasetDto, OutputStream outputStream) thr xmlw.writeDefaultNamespace("ddi:codebook:2_5"); xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION); - writeAttribute(xmlw, "version", DDIExporter.DEFAULT_XML_VERSION); + xmlw.writeAttribute("version", DDIExporter.DEFAULT_XML_VERSION); if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) { - writeAttribute(xmlw, "xml:lang", datasetDto.getMetadataLanguage()); + xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage()); } createStdyDscr(xmlw, datasetDto); createOtherMats(xmlw, datasetDto.getDatasetVersion().getFiles()); @@ -133,9 +135,9 @@ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, JsonArray fileDe xmlw.writeDefaultNamespace("ddi:codebook:2_5"); xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION); - writeAttribute(xmlw, "version", DDIExporter.DEFAULT_XML_VERSION); + xmlw.writeAttribute("version", DDIExporter.DEFAULT_XML_VERSION); if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) { - writeAttribute(xmlw, "xml:lang", datasetDto.getMetadataLanguage()); + xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage()); } createStdyDscr(xmlw, datasetDto); createFileDscr(xmlw, fileDetails); @@ -186,15 +188,15 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) xmlw.writeStartElement("citation"); xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); - writeFullElement(xmlw, "subTitl", dto2Primitive(version, DatasetFieldConstant.subTitle)); + XmlWriterUtil.writeFullElement(xmlw, "titl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); + XmlWriterUtil.writeFullElement(xmlw, "subTitl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.subTitle)); FieldDTO altField = dto2FieldDTO( version, DatasetFieldConstant.alternativeTitle, "citation" ); if (altField != null) { writeMultipleElement(xmlw, "altTitl", altField, datasetDto.getMetadataLanguage()); } xmlw.writeStartElement("IDNo"); - writeAttribute(xmlw, "agency", persistentAgency); + XmlWriterUtil.writeAttribute(xmlw, "agency", persistentAgency); xmlw.writeCharacters(pid); @@ -218,23 +220,23 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) boolean excludeRepository = settingsService.isTrueForKey(SettingsServiceBean.Key.ExportInstallationAsDistributorOnlyWhenNotSet, false); if (!StringUtils.isEmpty(datasetDto.getPublisher()) && !(excludeRepository && distributorSet)) { xmlw.writeStartElement("distrbtr"); - writeAttribute(xmlw, "source", "archive"); + xmlw.writeAttribute("source", "archive"); xmlw.writeCharacters(datasetDto.getPublisher()); xmlw.writeEndElement(); //distrbtr } writeDistributorsElement(xmlw, version, datasetDto.getMetadataLanguage()); writeContactsElement(xmlw, version); /* per SCHEMA, depositr comes before depDate! - L.A. */ - writeFullElement(xmlw, "depositr", dto2Primitive(version, DatasetFieldConstant.depositor)); + XmlWriterUtil.writeFullElement(xmlw, "depositr", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.depositor)); /* ... and depDate comes before distDate - L.A. */ - writeFullElement(xmlw, "depDate", dto2Primitive(version, DatasetFieldConstant.dateOfDeposit)); - writeFullElement(xmlw, "distDate", dto2Primitive(version, DatasetFieldConstant.distributionDate)); + XmlWriterUtil.writeFullElement(xmlw, "depDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.dateOfDeposit)); + XmlWriterUtil.writeFullElement(xmlw, "distDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.distributionDate)); xmlw.writeEndElement(); // diststmt writeSeriesElement(xmlw, version); xmlw.writeStartElement("holdings"); - writeAttribute(xmlw, "URI", pidUri); + XmlWriterUtil.writeAttribute(xmlw, "URI", pidUri); xmlw.writeEndElement(); //holdings xmlw.writeEndElement(); // citation @@ -247,7 +249,7 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeSubjectElement(xmlw, version, datasetDto.getMetadataLanguage()); //Subject and Keywords writeAbstractElement(xmlw, version, datasetDto.getMetadataLanguage()); // Description writeSummaryDescriptionElement(xmlw, version, datasetDto.getMetadataLanguage()); - writeFullElement(xmlw, "notes", dto2Primitive(version, DatasetFieldConstant.notesText)); + XmlWriterUtil.writeFullElement(xmlw, "notes", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.notesText)); //////// xmlw.writeEndElement(); // stdyInfo @@ -255,7 +257,7 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeDataAccess(xmlw , version); writeOtherStudyMaterial(xmlw , version); - writeFullElement(xmlw, "notes", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); + XmlWriterUtil.writeFullElement(xmlw, "notes", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); xmlw.writeEndElement(); // stdyDscr @@ -274,10 +276,10 @@ private static void writeOtherStudyMaterial(XMLStreamWriter xmlw , DatasetVersio return; } xmlw.writeStartElement("othrStdyMat"); - writeFullElementList(xmlw, "relMat", relMaterials); - writeFullElementList(xmlw, "relStdy", relDatasets); + XmlWriterUtil.writeFullElementList(xmlw, "relMat", relMaterials); + XmlWriterUtil.writeFullElementList(xmlw, "relStdy", relDatasets); writeRelPublElement(xmlw, version); - writeFullElementList(xmlw, "othRefs", relReferences); + XmlWriterUtil.writeFullElementList(xmlw, "othRefs", relReferences); xmlw.writeEndElement(); //othrStdyMat } @@ -292,29 +294,29 @@ private static void writeDataAccess(XMLStreamWriter xmlw , DatasetVersionDTO ver xmlw.writeStartElement("dataAccs"); xmlw.writeStartElement("setAvail"); - writeFullElement(xmlw, "accsPlac", version.getDataAccessPlace()); - writeFullElement(xmlw, "origArch", version.getOriginalArchive()); - writeFullElement(xmlw, "avlStatus", version.getAvailabilityStatus()); - writeFullElement(xmlw, "collSize", version.getSizeOfCollection()); - writeFullElement(xmlw, "complete", version.getStudyCompletion()); + XmlWriterUtil.writeFullElement(xmlw, "accsPlac", version.getDataAccessPlace()); + XmlWriterUtil.writeFullElement(xmlw, "origArch", version.getOriginalArchive()); + XmlWriterUtil.writeFullElement(xmlw, "avlStatus", version.getAvailabilityStatus()); + XmlWriterUtil.writeFullElement(xmlw, "collSize", version.getSizeOfCollection()); + XmlWriterUtil.writeFullElement(xmlw, "complete", version.getStudyCompletion()); xmlw.writeEndElement(); //setAvail xmlw.writeStartElement("useStmt"); - writeFullElement(xmlw, "confDec", version.getConfidentialityDeclaration()); - writeFullElement(xmlw, "specPerm", version.getSpecialPermissions()); - writeFullElement(xmlw, "restrctn", version.getRestrictions()); - writeFullElement(xmlw, "contact", version.getContactForAccess()); - writeFullElement(xmlw, "citReq", version.getCitationRequirements()); - writeFullElement(xmlw, "deposReq", version.getDepositorRequirements()); - writeFullElement(xmlw, "conditions", version.getConditions()); - writeFullElement(xmlw, "disclaimer", version.getDisclaimer()); + XmlWriterUtil.writeFullElement(xmlw, "confDec", version.getConfidentialityDeclaration()); + XmlWriterUtil.writeFullElement(xmlw, "specPerm", version.getSpecialPermissions()); + XmlWriterUtil.writeFullElement(xmlw, "restrctn", version.getRestrictions()); + XmlWriterUtil.writeFullElement(xmlw, "contact", version.getContactForAccess()); + XmlWriterUtil.writeFullElement(xmlw, "citReq", version.getCitationRequirements()); + XmlWriterUtil.writeFullElement(xmlw, "deposReq", version.getDepositorRequirements()); + XmlWriterUtil.writeFullElement(xmlw, "conditions", version.getConditions()); + XmlWriterUtil.writeFullElement(xmlw, "disclaimer", version.getDisclaimer()); xmlw.writeEndElement(); //useStmt /* any s: */ if (version.getTermsOfAccess() != null && !version.getTermsOfAccess().trim().equals("")) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "type", NOTE_TYPE_TERMS_OF_ACCESS); - writeAttribute(xmlw, "level", LEVEL_DV); + xmlw.writeAttribute("type", NOTE_TYPE_TERMS_OF_ACCESS); + xmlw.writeAttribute("level", LEVEL_DV); xmlw.writeCharacters(version.getTermsOfAccess()); xmlw.writeEndElement(); //notes } @@ -341,9 +343,9 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase xmlw.writeStartElement("docDscr"); xmlw.writeStartElement("citation"); xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); + XmlWriterUtil.writeFullElement(xmlw, "titl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); xmlw.writeStartElement("IDNo"); - writeAttribute(xmlw, "agency", persistentAgency); + XmlWriterUtil.writeAttribute(xmlw, "agency", persistentAgency); xmlw.writeCharacters(persistentProtocol + ":" + persistentAuthority + "/" + persistentId); xmlw.writeEndElement(); // IDNo xmlw.writeEndElement(); // titlStmt @@ -351,11 +353,11 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase //The doc is always published by the Dataverse Repository if (!StringUtils.isEmpty(datasetDto.getPublisher())) { xmlw.writeStartElement("distrbtr"); - writeAttribute(xmlw, "source", "archive"); + xmlw.writeAttribute("source", "archive"); xmlw.writeCharacters(datasetDto.getPublisher()); xmlw.writeEndElement(); // distrbtr } - writeFullElement(xmlw, "distDate", datasetDto.getPublicationDate()); + XmlWriterUtil.writeFullElement(xmlw, "distDate", datasetDto.getPublicationDate()); xmlw.writeEndElement(); // diststmt writeVersionStatement(xmlw, version); @@ -369,10 +371,10 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException{ xmlw.writeStartElement("verStmt"); - writeAttribute(xmlw,"source","archive"); + XmlWriterUtil.writeAttribute(xmlw,"source","archive"); xmlw.writeStartElement("version"); - writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); - writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); + XmlWriterUtil.writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); + XmlWriterUtil.writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); xmlw.writeCharacters(datasetVersionDTO.getVersionNumber().toString()); xmlw.writeEndElement(); // version xmlw.writeEndElement(); // verStmt @@ -523,14 +525,14 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset * "" entries, then all the "" ones: */ for (String nationEntry : nationList) { - writeFullElement(xmlw, "nation", nationEntry); + XmlWriterUtil.writeFullElement(xmlw, "nation", nationEntry); } for (String geogCoverEntry : geogCoverList) { - writeFullElement(xmlw, "geogCover", geogCoverEntry); + XmlWriterUtil.writeFullElement(xmlw, "geogCover", geogCoverEntry); } } - writeFullElementList(xmlw, "geogUnit", dto2PrimitiveList(datasetVersionDTO, DatasetFieldConstant.geographicUnit)); + XmlWriterUtil.writeFullElementList(xmlw, "geogUnit", dto2PrimitiveList(datasetVersionDTO, DatasetFieldConstant.geographicUnit)); /* Only 1 geoBndBox is allowed in the DDI. So, I'm just going to arbitrarily use the first one, and ignore the rest! -L.A. */ @@ -563,16 +565,16 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset */ if (geoBndBoxMap.get("westBL") != null) { - writeFullElement(xmlw, "westBL", geoBndBoxMap.get("westBL")); + XmlWriterUtil.writeFullElement(xmlw, "westBL", geoBndBoxMap.get("westBL")); } if (geoBndBoxMap.get("eastBL") != null) { - writeFullElement(xmlw, "eastBL", geoBndBoxMap.get("eastBL")); + XmlWriterUtil.writeFullElement(xmlw, "eastBL", geoBndBoxMap.get("eastBL")); } if (geoBndBoxMap.get("southBL") != null) { - writeFullElement(xmlw, "southBL", geoBndBoxMap.get("southBL")); + XmlWriterUtil.writeFullElement(xmlw, "southBL", geoBndBoxMap.get("southBL")); } if (geoBndBoxMap.get("northBL") != null) { - writeFullElement(xmlw, "northBL", geoBndBoxMap.get("northBL")); + XmlWriterUtil.writeFullElement(xmlw, "northBL", geoBndBoxMap.get("northBL")); } xmlw.writeEndElement(); @@ -580,7 +582,7 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset /* analyUnit: */ if (unitOfAnalysisDTO != null) { - writeI18NElementList(xmlw, "anlyUnit", unitOfAnalysisDTO.getMultipleVocab(), "unitOfAnalysis", unitOfAnalysisDTO.getTypeClass(), "socialscience", lang); + XmlWriterUtil.writeI18NElementList(xmlw, "anlyUnit", unitOfAnalysisDTO.getMultipleVocab(), "unitOfAnalysis", unitOfAnalysisDTO.getTypeClass(), "socialscience", lang); } @@ -600,16 +602,16 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset private static void writeMultipleElement(XMLStreamWriter xmlw, String element, FieldDTO fieldDTO, String lang) throws XMLStreamException { for (String value : fieldDTO.getMultiplePrimitive()) { //Write multiple lang vals for controlled vocab, otherwise don't include any lang tag - writeFullElement(xmlw, element, value, fieldDTO.isControlledVocabularyField() ? lang : null); + XmlWriterUtil.writeFullElement(xmlw, element, value, fieldDTO.isControlledVocabularyField() ? lang : null); } } private static void writeDateElement(XMLStreamWriter xmlw, String element, String cycle, String event, String dateIn) throws XMLStreamException { xmlw.writeStartElement(element); - writeAttribute(xmlw, "cycle", cycle); - writeAttribute(xmlw, "event", event); - writeAttribute(xmlw, "date", dateIn); + XmlWriterUtil.writeAttribute(xmlw, "cycle", cycle); + XmlWriterUtil.writeAttribute(xmlw, "event", event); + XmlWriterUtil.writeAttribute(xmlw, "date", dateIn); xmlw.writeCharacters(dateIn); xmlw.writeEndElement(); @@ -641,15 +643,15 @@ private static void writeDateElement(XMLStreamWriter xmlw, String element, Strin private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO version, String lang) throws XMLStreamException{ xmlw.writeStartElement("method"); xmlw.writeStartElement("dataColl"); - writeI18NElement(xmlw, "timeMeth", version, DatasetFieldConstant.timeMethod,lang); - writeI18NElement(xmlw, "dataCollector", version, DatasetFieldConstant.dataCollector, lang); - writeI18NElement(xmlw, "collectorTraining", version, DatasetFieldConstant.collectorTraining, lang); - writeI18NElement(xmlw, "frequenc", version, DatasetFieldConstant.frequencyOfDataCollection, lang); - writeI18NElement(xmlw, "sampProc", version, DatasetFieldConstant.samplingProcedure, lang); + XmlWriterUtil.writeI18NElement(xmlw, "timeMeth", version, DatasetFieldConstant.timeMethod,lang); + XmlWriterUtil.writeI18NElement(xmlw, "dataCollector", version, DatasetFieldConstant.dataCollector, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collectorTraining", version, DatasetFieldConstant.collectorTraining, lang); + XmlWriterUtil.writeI18NElement(xmlw, "frequenc", version, DatasetFieldConstant.frequencyOfDataCollection, lang); + XmlWriterUtil.writeI18NElement(xmlw, "sampProc", version, DatasetFieldConstant.samplingProcedure, lang); writeTargetSampleElement(xmlw, version); - writeI18NElement(xmlw, "deviat", version, DatasetFieldConstant.deviationsFromSampleDesign, lang); + XmlWriterUtil.writeI18NElement(xmlw, "deviat", version, DatasetFieldConstant.deviationsFromSampleDesign, lang); /* comes before : */ FieldDTO collModeFieldDTO = dto2FieldDTO(version, DatasetFieldConstant.collectionMode, "socialscience"); @@ -658,37 +660,37 @@ private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO // Below is a backward compatibility check allowing export to work in // an instance where the metadata block has not been updated yet. if (collModeFieldDTO.getMultiple()) { - writeI18NElementList(xmlw, "collMode", collModeFieldDTO.getMultipleVocab(), DatasetFieldConstant.collectionMode, collModeFieldDTO.getTypeClass(), "socialscience", lang); + XmlWriterUtil.writeI18NElementList(xmlw, "collMode", collModeFieldDTO.getMultipleVocab(), DatasetFieldConstant.collectionMode, collModeFieldDTO.getTypeClass(), "socialscience", lang); } else { - writeI18NElement(xmlw, "collMode", version, DatasetFieldConstant.collectionMode, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collMode", version, DatasetFieldConstant.collectionMode, lang); } } /* and so does : */ - writeI18NElement(xmlw, "resInstru", version, DatasetFieldConstant.researchInstrument, lang); + XmlWriterUtil.writeI18NElement(xmlw, "resInstru", version, DatasetFieldConstant.researchInstrument, lang); xmlw.writeStartElement("sources"); - writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); - writeI18NElement(xmlw, "srcOrig", version, DatasetFieldConstant.originOfSources, lang); - writeI18NElement(xmlw, "srcChar", version, DatasetFieldConstant.characteristicOfSources, lang); - writeI18NElement(xmlw, "srcDocu", version, DatasetFieldConstant.accessToSources, lang); + XmlWriterUtil.writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); + XmlWriterUtil.writeI18NElement(xmlw, "srcOrig", version, DatasetFieldConstant.originOfSources, lang); + XmlWriterUtil.writeI18NElement(xmlw, "srcChar", version, DatasetFieldConstant.characteristicOfSources, lang); + XmlWriterUtil.writeI18NElement(xmlw, "srcDocu", version, DatasetFieldConstant.accessToSources, lang); xmlw.writeEndElement(); //sources - writeI18NElement(xmlw, "collSitu", version, DatasetFieldConstant.dataCollectionSituation, lang); - writeI18NElement(xmlw, "actMin", version, DatasetFieldConstant.actionsToMinimizeLoss, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collSitu", version, DatasetFieldConstant.dataCollectionSituation, lang); + XmlWriterUtil.writeI18NElement(xmlw, "actMin", version, DatasetFieldConstant.actionsToMinimizeLoss, lang); /* "" has the uppercase C: */ - writeI18NElement(xmlw, "ConOps", version, DatasetFieldConstant.controlOperations, lang); - writeI18NElement(xmlw, "weight", version, DatasetFieldConstant.weighting, lang); - writeI18NElement(xmlw, "cleanOps", version, DatasetFieldConstant.cleaningOperations, lang); + XmlWriterUtil.writeI18NElement(xmlw, "ConOps", version, DatasetFieldConstant.controlOperations, lang); + XmlWriterUtil.writeI18NElement(xmlw, "weight", version, DatasetFieldConstant.weighting, lang); + XmlWriterUtil.writeI18NElement(xmlw, "cleanOps", version, DatasetFieldConstant.cleaningOperations, lang); xmlw.writeEndElement(); //dataColl /* before : */ writeNotesElement(xmlw, version); xmlw.writeStartElement("anlyInfo"); - //writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); - writeI18NElement(xmlw, "respRate", version, DatasetFieldConstant.responseRate, lang); - writeI18NElement(xmlw, "EstSmpErr", version, DatasetFieldConstant.samplingErrorEstimates, lang); - writeI18NElement(xmlw, "dataAppr", version, DatasetFieldConstant.otherDataAppraisal, lang); + //XmlWriterUtil.writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); + XmlWriterUtil.writeI18NElement(xmlw, "respRate", version, DatasetFieldConstant.responseRate, lang); + XmlWriterUtil.writeI18NElement(xmlw, "EstSmpErr", version, DatasetFieldConstant.samplingErrorEstimates, lang); + XmlWriterUtil.writeI18NElement(xmlw, "dataAppr", version, DatasetFieldConstant.otherDataAppraisal, lang); xmlw.writeEndElement(); //anlyInfo xmlw.writeEndElement();//method @@ -705,7 +707,7 @@ private static void writeSubjectElement(XMLStreamWriter xmlw, DatasetVersionDTO if (CITATION_BLOCK_NAME.equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.subject.equals(fieldDTO.getTypeName())) { - writeI18NElementList(xmlw, "keyword", fieldDTO.getMultipleVocab(), "subject", + XmlWriterUtil.writeI18NElementList(xmlw, "keyword", fieldDTO.getMultipleVocab(), "subject", fieldDTO.getTypeClass(), "citation", lang); } @@ -732,14 +734,10 @@ private static void writeSubjectElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!keywordValue.isEmpty()) { xmlw.writeStartElement("keyword"); - if (!keywordVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", keywordVocab); - } - if (!keywordURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", keywordURI); - } + XmlWriterUtil.writeAttribute(xmlw, "vocab", keywordVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", keywordURI); if (lang != null && isCVV) { - writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); xmlw.writeCharacters(ControlledVocabularyValue.getLocaleStrValue(keywordValue, DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, defaultLocale, true)); @@ -753,13 +751,9 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), false); if (translatedValue != null) { xmlw.writeStartElement("keyword"); - if (!keywordVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", keywordVocab); - } - if (!keywordURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", keywordURI); - } - writeAttribute(xmlw, "xml:lang", lang); + XmlWriterUtil.writeAttribute(xmlw, "vocab", keywordVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", keywordURI); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", lang); xmlw.writeCharacters(translatedValue); xmlw.writeEndElement(); // Keyword } @@ -792,14 +786,10 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), } if (!topicClassificationValue.isEmpty()) { xmlw.writeStartElement("topcClas"); - if (!topicClassificationVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", topicClassificationVocab); - } - if (!topicClassificationURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", topicClassificationURI); - } + XmlWriterUtil.writeAttribute(xmlw, "vocab", topicClassificationVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", topicClassificationURI); if (lang != null && isCVV) { - writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); xmlw.writeCharacters(ControlledVocabularyValue.getLocaleStrValue( topicClassificationValue, DatasetFieldConstant.topicClassValue, CITATION_BLOCK_NAME, defaultLocale, true)); @@ -813,13 +803,9 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), CITATION_BLOCK_NAME, new Locale(lang), false); if (translatedValue != null) { xmlw.writeStartElement("topcClas"); - if (!topicClassificationVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", topicClassificationVocab); - } - if (!topicClassificationURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", topicClassificationURI); - } - writeAttribute(xmlw, "xml:lang", lang); + XmlWriterUtil.writeAttribute(xmlw, "vocab", topicClassificationVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", topicClassificationURI); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", lang); xmlw.writeCharacters(translatedValue); xmlw.writeEndElement(); // topcClas } @@ -857,7 +843,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!authorName.isEmpty()){ xmlw.writeStartElement("AuthEnty"); if(!authorAffiliation.isEmpty()){ - writeAttribute(xmlw,"affiliation",authorAffiliation); + XmlWriterUtil.writeAttribute(xmlw,"affiliation",authorAffiliation); } xmlw.writeCharacters(authorName); xmlw.writeEndElement(); //AuthEnty @@ -880,7 +866,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!contributorName.isEmpty()){ xmlw.writeStartElement("othId"); if(!contributorType.isEmpty()){ - writeAttribute(xmlw,"role", contributorType); + XmlWriterUtil.writeAttribute(xmlw,"role", contributorType); } xmlw.writeCharacters(contributorName); xmlw.writeEndElement(); //othId @@ -922,10 +908,10 @@ private static void writeContactsElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!datasetContactName.isEmpty()){ xmlw.writeStartElement("contact"); if(!datasetContactAffiliation.isEmpty()){ - writeAttribute(xmlw,"affiliation",datasetContactAffiliation); + XmlWriterUtil.writeAttribute(xmlw,"affiliation",datasetContactAffiliation); } if(!datasetContactEmail.isEmpty()){ - writeAttribute(xmlw,"email",datasetContactEmail); + XmlWriterUtil.writeAttribute(xmlw,"email",datasetContactEmail); } xmlw.writeCharacters(datasetContactName); xmlw.writeEndElement(); //AuthEnty @@ -969,14 +955,10 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT } if (!producerName.isEmpty()) { xmlw.writeStartElement("producer"); - if (!producerAffiliation.isEmpty()) { - writeAttribute(xmlw, "affiliation", producerAffiliation); - } - if (!producerAbbreviation.isEmpty()) { - writeAttribute(xmlw, "abbr", producerAbbreviation); - } + XmlWriterUtil.writeAttribute(xmlw, "affiliation", producerAffiliation); + XmlWriterUtil.writeAttribute(xmlw, "abbr", producerAbbreviation); /*if (!producerLogo.isEmpty()) { - writeAttribute(xmlw, "role", producerLogo); + XmlWriterUtil.writeAttribute(xmlw, "role", producerLogo); }*/ xmlw.writeCharacters(producerName); xmlw.writeEndElement(); //AuthEnty @@ -987,7 +969,7 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT } } } - writeFullElement(xmlw, "prodDate", dto2Primitive(version, DatasetFieldConstant.productionDate)); + XmlWriterUtil.writeFullElement(xmlw, "prodDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.productionDate)); // productionPlace was made multiple as of 5.14: // (a quick backward compatibility check was added to dto2PrimitiveList(), // see the method for details) @@ -1033,17 +1015,11 @@ private static void writeDistributorsElement(XMLStreamWriter xmlw, DatasetVersio if (!distributorName.isEmpty()) { xmlw.writeStartElement("distrbtr"); if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); - } - if (!distributorAffiliation.isEmpty()) { - writeAttribute(xmlw, "affiliation", distributorAffiliation); - } - if (!distributorAbbreviation.isEmpty()) { - writeAttribute(xmlw, "abbr", distributorAbbreviation); - } - if (!distributorURL.isEmpty()) { - writeAttribute(xmlw, "URI", distributorURL); + xmlw.writeAttribute("xml:lang", lang); } + XmlWriterUtil.writeAttribute(xmlw, "affiliation", distributorAffiliation); + XmlWriterUtil.writeAttribute(xmlw, "abbr", distributorAbbreviation); + XmlWriterUtil.writeAttribute(xmlw, "URI", distributorURL); xmlw.writeCharacters(distributorName); xmlw.writeEndElement(); //AuthEnty } @@ -1102,7 +1078,7 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO (In other words - titlStmt is mandatory! -L.A.) */ xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", citation); + XmlWriterUtil.writeFullElement(xmlw, "titl", citation); if (IDNo != null && !IDNo.trim().equals("")) { xmlw.writeStartElement("IDNo"); @@ -1115,7 +1091,7 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO xmlw.writeEndElement(); // titlStmt - writeFullElement(xmlw,"biblCit",citation); + XmlWriterUtil.writeFullElement(xmlw,"biblCit",citation); xmlw.writeEndElement(); //citation if (url != null && !url.trim().equals("") ) { xmlw.writeStartElement("ExtLink"); @@ -1164,10 +1140,10 @@ private static void writeAbstractElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!descriptionText.isEmpty()){ xmlw.writeStartElement("abstract"); if(!descriptionDate.isEmpty()){ - writeAttribute(xmlw,"date",descriptionDate); + XmlWriterUtil.writeAttribute(xmlw,"date",descriptionDate); } if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); + xmlw.writeAttribute("xml:lang", lang); } xmlw.writeCharacters(descriptionText); xmlw.writeEndElement(); //abstract @@ -1201,7 +1177,7 @@ private static void writeGrantElement(XMLStreamWriter xmlw, DatasetVersionDTO da if (!grantNumber.isEmpty()){ xmlw.writeStartElement("grantNo"); if(!grantAgency.isEmpty()){ - writeAttribute(xmlw,"agency",grantAgency); + XmlWriterUtil.writeAttribute(xmlw,"agency",grantAgency); } xmlw.writeCharacters(grantNumber); xmlw.writeEndElement(); //grantno @@ -1235,7 +1211,7 @@ private static void writeOtherIdElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!otherId.isEmpty()){ xmlw.writeStartElement("IDNo"); if(!otherIdAgency.isEmpty()){ - writeAttribute(xmlw,"agency",otherIdAgency); + XmlWriterUtil.writeAttribute(xmlw,"agency",otherIdAgency); } xmlw.writeCharacters(otherId); xmlw.writeEndElement(); //IDNo @@ -1269,7 +1245,7 @@ private static void writeSoftwareElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!softwareName.isEmpty()){ xmlw.writeStartElement("software"); if(!softwareVersion.isEmpty()){ - writeAttribute(xmlw,"version",softwareVersion); + XmlWriterUtil.writeAttribute(xmlw,"version",softwareVersion); } xmlw.writeCharacters(softwareName); xmlw.writeEndElement(); //software @@ -1384,10 +1360,10 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da if (!notesText.isEmpty()) { xmlw.writeStartElement("notes"); if(!notesType.isEmpty()){ - writeAttribute(xmlw,"type",notesType); + XmlWriterUtil.writeAttribute(xmlw,"type",notesType); } if(!notesSubject.isEmpty()){ - writeAttribute(xmlw,"subject",notesSubject); + XmlWriterUtil.writeAttribute(xmlw,"subject",notesSubject); } xmlw.writeCharacters(notesText); xmlw.writeEndElement(); @@ -1412,14 +1388,14 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos // and observations, etc.) if (fileDTo.getDataFile().getDataTables() == null || fileDTo.getDataFile().getDataTables().isEmpty()) { xmlw.writeStartElement("otherMat"); - writeAttribute(xmlw, "ID", "f" + fileDTo.getDataFile().getId()); + XmlWriterUtil.writeAttribute(xmlw, "ID", "f" + fileDTo.getDataFile().getId()); String pidURL = fileDTo.getDataFile().getPidURL(); if (pidURL != null && !pidURL.isEmpty()){ - writeAttribute(xmlw, "URI", pidURL); + xmlw.writeAttribute("URI", pidURL); } else { - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileDTo.getDataFile().getId()); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileDTo.getDataFile().getId()); } - writeAttribute(xmlw, "level", "datafile"); + xmlw.writeAttribute("level", "datafile"); xmlw.writeStartElement("labl"); xmlw.writeCharacters(fileDTo.getDataFile().getFilename()); xmlw.writeEndElement(); // labl @@ -1430,9 +1406,9 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos String contentType = fileDTo.getDataFile().getContentType(); if (!StringUtilisEmpty(contentType)) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_CONTENTTYPE); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_CONTENTTYPE); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_CONTENTTYPE); + xmlw.writeAttribute("subject", NOTE_SUBJECT_CONTENTTYPE); xmlw.writeCharacters(contentType); xmlw.writeEndElement(); // notes } @@ -1460,14 +1436,14 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA // and observations, etc.) if (!fileJson.containsKey("dataTables")) { xmlw.writeStartElement("otherMat"); - writeAttribute(xmlw, "ID", "f" + fileJson.getJsonNumber(("id").toString())); + xmlw.writeAttribute("ID", "f" + fileJson.getJsonNumber(("id").toString())); if (fileJson.containsKey("pidUrl")){ - writeAttribute(xmlw, "URI", fileJson.getString("pidUrl")); + XmlWriterUtil.writeAttribute(xmlw, "URI", fileJson.getString("pidUrl")); } else { - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileJson.getJsonNumber("id").toString()); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileJson.getJsonNumber("id").toString()); } - writeAttribute(xmlw, "level", "datafile"); + xmlw.writeAttribute("level", "datafile"); xmlw.writeStartElement("labl"); xmlw.writeCharacters(fileJson.getString("filename")); xmlw.writeEndElement(); // labl @@ -1482,9 +1458,9 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA // specially formatted notes section: if (fileJson.containsKey("contentType")) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_CONTENTTYPE); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_CONTENTTYPE); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_CONTENTTYPE); + xmlw.writeAttribute("subject", NOTE_SUBJECT_CONTENTTYPE); xmlw.writeCharacters(fileJson.getString("contentType")); xmlw.writeEndElement(); // notes } @@ -1502,33 +1478,7 @@ private static void writeFileDescription(XMLStreamWriter xmlw, FileDTO fileDTo) xmlw.writeEndElement(); // txt } - private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { - for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { - MetadataBlockDTO value = entry.getValue(); - for (FieldDTO fieldDTO : value.getFields()) { - if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { - return fieldDTO.getSinglePrimitive(); - } - } - } - return null; - } - - private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName, Locale locale) { - for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { - MetadataBlockDTO value = entry.getValue(); - for (FieldDTO fieldDTO : value.getFields()) { - if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { - String rawVal = fieldDTO.getSinglePrimitive(); - if (fieldDTO.isControlledVocabularyField()) { - return ControlledVocabularyValue.getLocaleStrValue(rawVal, datasetFieldTypeName, value.getName(), - locale, false); - } - } - } - } - return null; - } + private static List dto2PrimitiveList(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { @@ -1562,104 +1512,6 @@ private static FieldDTO dto2FieldDTO(DatasetVersionDTO datasetVersionDTO, String return null; } - private static void writeFullElementList(XMLStreamWriter xmlw, String name, List values) throws XMLStreamException { - //For the simplest Elements we can - if (values != null && !values.isEmpty()) { - for (String value : values) { - xmlw.writeStartElement(name); - xmlw.writeCharacters(value); - xmlw.writeEndElement(); // labl - } - } - } - - private static void writeI18NElementList(XMLStreamWriter xmlw, String name, List values, - String fieldTypeName, String fieldTypeClass, String metadataBlockName, String lang) - throws XMLStreamException { - - if (values != null && !values.isEmpty()) { - Locale defaultLocale = Locale.getDefault(); - for (String value : values) { - if (fieldTypeClass.equals("controlledVocabulary")) { - String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, defaultLocale, false); - if (localeVal != null) { - - value = localeVal; - writeFullElement(xmlw, name, value, defaultLocale.getLanguage()); - } else { - writeFullElement(xmlw, name, value); - } - } else { - writeFullElement(xmlw, name, value); - } - } - if (lang != null && !defaultLocale.getLanguage().equals(lang)) { - // Get values in dataset metadata language - // Loop before testing fieldTypeClass to be ready for external CVV - for (String value : values) { - if (fieldTypeClass.equals("controlledVocabulary")) { - String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, new Locale(lang), false); - if (localeVal != null) { - writeFullElement(xmlw, name, localeVal, lang); - } - } - } - } - } - } - - private static void writeI18NElement(XMLStreamWriter xmlw, String name, DatasetVersionDTO version, - String fieldTypeName, String lang) throws XMLStreamException { - // Get the default value - String val = dto2Primitive(version, fieldTypeName); - Locale defaultLocale = Locale.getDefault(); - // Get the language-specific value for the default language - // A null value is returned if this is not a CVV field - String localeVal = dto2Primitive(version, fieldTypeName, defaultLocale); - String requestedLocaleVal = null; - if (lang != null && localeVal != null && !defaultLocale.getLanguage().equals(lang)) { - // Also get the value in the requested locale/lang if that's not the default - // lang. - requestedLocaleVal = dto2Primitive(version, fieldTypeName, new Locale(lang)); - } - // FWIW locale-specific vals will only be non-null for CVV values (at present) - if (localeVal == null && requestedLocaleVal == null) { - // Not CVV/no translations so print without lang tag - writeFullElement(xmlw, name, val); - } else { - // Print in either/both languages if we have values - if (localeVal != null) { - // Print the value for the default locale with it's own lang tag - writeFullElement(xmlw, name, localeVal, defaultLocale.getLanguage()); - } - // Also print in the request lang (i.e. the metadata language for the dataset) if a value exists, print it with a lang tag - if (requestedLocaleVal != null) { - writeFullElement(xmlw, name, requestedLocaleVal, lang); - } - } - } - - private static void writeFullElement(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { - writeFullElement(xmlw, name, value, null); - } - - private static void writeFullElement (XMLStreamWriter xmlw, String name, String value, String lang) throws XMLStreamException { - //For the simplest Elements we can - if (!StringUtilisEmpty(value)) { - xmlw.writeStartElement(name); - if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); - } - xmlw.writeCharacters(value); - xmlw.writeEndElement(); // labl - } - } - - private static void writeAttribute(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { - if (!StringUtilisEmpty(value)) { - xmlw.writeAttribute(name, value); - } - } private static boolean StringUtilisEmpty(String str) { if (str == null || str.trim().equals("")) { @@ -1747,14 +1599,14 @@ public static void createDataDscr(XMLStreamWriter xmlw, JsonArray fileDetails) t } private static void createVarGroupDDI(XMLStreamWriter xmlw, JsonObject varGrp) throws XMLStreamException { xmlw.writeStartElement("varGrp"); - writeAttribute(xmlw, "ID", "VG" + varGrp.getJsonNumber("id").toString()); + xmlw.writeAttribute("ID", "VG" + varGrp.getJsonNumber("id").toString()); String vars = ""; JsonArray varsInGroup = varGrp.getJsonArray("dataVariableIds"); for (int j=0;j sumStat : dvar.getJsonObject("summaryStatistics").entrySet()) { xmlw.writeStartElement("sumStat"); - writeAttribute(xmlw, "type", sumStat.getKey()); + XmlWriterUtil.writeAttribute(xmlw, "type", sumStat.getKey()); xmlw.writeCharacters(((JsonString)sumStat.getValue()).getString()); xmlw.writeEndElement(); // sumStat } @@ -1917,7 +1769,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f JsonObject varCat = varCats.getJsonObject(i); xmlw.writeStartElement("catgry"); if (varCat.getBoolean("isMissing")) { - writeAttribute(xmlw, "missing", "Y"); + xmlw.writeAttribute("missing", "Y"); } // catValu @@ -1928,7 +1780,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // label if (varCat.containsKey("label")) { xmlw.writeStartElement("labl"); - writeAttribute(xmlw, "level", "category"); + xmlw.writeAttribute("level", "category"); xmlw.writeCharacters(varCat.getString("label")); xmlw.writeEndElement(); // labl } @@ -1936,7 +1788,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // catStat if (varCat.containsKey("frequency")) { xmlw.writeStartElement("catStat"); - writeAttribute(xmlw, "type", "freq"); + xmlw.writeAttribute("type", "freq"); Double freq = varCat.getJsonNumber("frequency").doubleValue(); // if frequency is actually a long value, we want to write "100" instead of // "100.0" @@ -1955,8 +1807,8 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f JsonObject cm = catMetas.getJsonObject(j); if (cm.getString("categoryValue").equals(varCat.getString("value"))) { xmlw.writeStartElement("catStat"); - writeAttribute(xmlw, "wgtd", "wgtd"); - writeAttribute(xmlw, "type", "freq"); + xmlw.writeAttribute("wgtd", "wgtd"); + xmlw.writeAttribute("type", "freq"); xmlw.writeCharacters(cm.getJsonNumber("wFreq").toString()); xmlw.writeEndElement(); // catStat break; @@ -1972,24 +1824,24 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // varFormat xmlw.writeEmptyElement("varFormat"); if(dvar.containsKey("variableFormatType")) { - writeAttribute(xmlw, "type", dvar.getString("variableFormatType").toLowerCase()); + XmlWriterUtil.writeAttribute(xmlw, "type", dvar.getString("variableFormatType").toLowerCase()); } else { throw new XMLStreamException("Illegal Variable Format Type!"); } if(dvar.containsKey("format")) { - writeAttribute(xmlw, "formatname", dvar.getString("format")); + XmlWriterUtil.writeAttribute(xmlw, "formatname", dvar.getString("format")); } //experiment writeAttribute(xmlw, "schema", dv.getFormatSchema()); if(dvar.containsKey("formatCategory")) { - writeAttribute(xmlw, "category", dvar.getString("formatCategory")); + XmlWriterUtil.writeAttribute(xmlw, "category", dvar.getString("formatCategory")); } // notes if (dvar.containsKey("UNF") && !dvar.getString("UNF").isBlank()) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "subject", "Universal Numeric Fingerprint"); - writeAttribute(xmlw, "level", "variable"); - writeAttribute(xmlw, "type", "Dataverse:UNF"); + xmlw.writeAttribute("subject", "Universal Numeric Fingerprint"); + xmlw.writeAttribute("level", "variable"); + xmlw.writeAttribute("type", "Dataverse:UNF"); xmlw.writeCharacters(dvar.getString("UNF")); xmlw.writeEndElement(); //notes } @@ -2020,8 +1872,8 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) } xmlw.writeStartElement("fileDscr"); String fileId = fileJson.getJsonNumber("id").toString(); - writeAttribute(xmlw, "ID", "f" + fileId); - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileId); + xmlw.writeAttribute("ID", "f" + fileId); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileId); xmlw.writeStartElement("fileTxt"); xmlw.writeStartElement("fileName"); @@ -2064,9 +1916,9 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) // (Universal Numeric Fingerprint) signature: if ((dt!=null) && (dt.containsKey("UNF") && !dt.getString("UNF").isBlank())) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_UNF); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_UNF); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_UNF); + xmlw.writeAttribute("subject", NOTE_SUBJECT_UNF); xmlw.writeCharacters(dt.getString("UNF")); xmlw.writeEndElement(); // notes } @@ -2075,9 +1927,9 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) JsonArray tags = fileJson.getJsonArray("tabularTags"); for (int j = 0; j < tags.size(); j++) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_TAG); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_TAG); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_TAG); + xmlw.writeAttribute("subject", NOTE_SUBJECT_TAG); xmlw.writeCharacters(tags.getString(j)); xmlw.writeEndElement(); // notes } @@ -2091,13 +1943,7 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) - private static boolean checkParentElement(XMLStreamWriter xmlw, String elementName, boolean elementAdded) throws XMLStreamException { - if (!elementAdded) { - xmlw.writeStartElement(elementName); - } - return true; - } public static void datasetHtmlDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java new file mode 100644 index 00000000000..e932307d3d0 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java @@ -0,0 +1,174 @@ +package edu.harvard.iq.dataverse.util.xml; + +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; + +import org.apache.commons.lang3.StringUtils; + +import edu.harvard.iq.dataverse.ControlledVocabularyValue; +import edu.harvard.iq.dataverse.DvObjectContainer; +import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; +import edu.harvard.iq.dataverse.api.dto.FieldDTO; +import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; + +public class XmlWriterUtil { + + public static void writeFullElementList(XMLStreamWriter xmlw, String name, List values) throws XMLStreamException { + // For the simplest Elements we can + if (values != null && !values.isEmpty()) { + for (String value : values) { + xmlw.writeStartElement(name); + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + } + + public static void writeI18NElementList(XMLStreamWriter xmlw, String name, List values, + String fieldTypeName, String fieldTypeClass, String metadataBlockName, String lang) + throws XMLStreamException { + + if (values != null && !values.isEmpty()) { + Locale defaultLocale = Locale.getDefault(); + for (String value : values) { + if (fieldTypeClass.equals("controlledVocabulary")) { + String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, defaultLocale, false); + if (localeVal != null) { + + value = localeVal; + writeFullElement(xmlw, name, value, defaultLocale.getLanguage()); + } else { + writeFullElement(xmlw, name, value); + } + } else { + writeFullElement(xmlw, name, value); + } + } + if (lang != null && !defaultLocale.getLanguage().equals(lang)) { + // Get values in dataset metadata language + // Loop before testing fieldTypeClass to be ready for external CVV + for (String value : values) { + if (fieldTypeClass.equals("controlledVocabulary")) { + String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, new Locale(lang), false); + if (localeVal != null) { + writeFullElement(xmlw, name, localeVal, lang); + } + } + } + } + } + } + + public static void writeI18NElement(XMLStreamWriter xmlw, String name, DatasetVersionDTO version, + String fieldTypeName, String lang) throws XMLStreamException { + // Get the default value + String val = dto2Primitive(version, fieldTypeName); + Locale defaultLocale = Locale.getDefault(); + // Get the language-specific value for the default language + // A null value is returned if this is not a CVV field + String localeVal = dto2Primitive(version, fieldTypeName, defaultLocale); + String requestedLocaleVal = null; + if (lang != null && localeVal != null && !defaultLocale.getLanguage().equals(lang)) { + // Also get the value in the requested locale/lang if that's not the default + // lang. + requestedLocaleVal = dto2Primitive(version, fieldTypeName, new Locale(lang)); + } + // FWIW locale-specific vals will only be non-null for CVV values (at present) + if (localeVal == null && requestedLocaleVal == null) { + // Not CVV/no translations so print without lang tag + writeFullElement(xmlw, name, val); + } else { + // Print in either/both languages if we have values + if (localeVal != null) { + // Print the value for the default locale with it's own lang tag + writeFullElement(xmlw, name, localeVal, defaultLocale.getLanguage()); + } + // Also print in the request lang (i.e. the metadata language for the dataset) + // if a value exists, print it with a lang tag + if (requestedLocaleVal != null) { + writeFullElement(xmlw, name, requestedLocaleVal, lang); + } + } + } + + public static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { + for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { + MetadataBlockDTO value = entry.getValue(); + for (FieldDTO fieldDTO : value.getFields()) { + if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { + return fieldDTO.getSinglePrimitive(); + } + } + } + return null; + } + + public static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName, Locale locale) { + for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { + MetadataBlockDTO value = entry.getValue(); + for (FieldDTO fieldDTO : value.getFields()) { + if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { + String rawVal = fieldDTO.getSinglePrimitive(); + if (fieldDTO.isControlledVocabularyField()) { + return ControlledVocabularyValue.getLocaleStrValue(rawVal, datasetFieldTypeName, value.getName(), + locale, false); + } + } + } + } + return null; + } + + public static void writeFullElement(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { + writeFullElement(xmlw, name, value, null); + } + + public static void writeFullElement(XMLStreamWriter xmlw, String name, String value, String lang) throws XMLStreamException { + // For the simplest Elements we can + if (!StringUtils.isEmpty(value)) { + xmlw.writeStartElement(name); + if (DvObjectContainer.isMetadataLanguageSet(lang)) { + writeAttribute(xmlw, "xml:lang", lang); + } + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + + public static void writeAttribute(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { + if (!StringUtils.isEmpty(value)) { + xmlw.writeAttribute(name, value); + } + } + + public static boolean checkParentElement(XMLStreamWriter xmlw, String elementName, boolean elementAdded) throws XMLStreamException { + if (!elementAdded) { + xmlw.writeStartElement(elementName); + } + + return true; + } + + public static void writeFullElementWithAttributes(XMLStreamWriter xmlw, String name, Map attributeMap, String value) throws XMLStreamException { + if (!StringUtils.isEmpty(value)) { + xmlw.writeStartElement(name); + for (String key : attributeMap.keySet()) { + writeAttribute(xmlw, key, attributeMap.get(key)); + } + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + + public static boolean writeOpenTagIfNeeded(XMLStreamWriter xmlw, String tag, boolean element_check) throws XMLStreamException { + // check if the current tag isn't opened + if (!element_check) { + xmlw.writeStartElement(tag); // + } + return true; + } +} From 1a46155a5ed37545455a194650301cbee5691358 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:32:21 -0400 Subject: [PATCH 083/622] note duplicate method --- .../iq/dataverse/export/openaire/OpenAireExportUtil.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 49ceabc5900..820ced3d6c4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -1428,6 +1428,8 @@ public static void writeFundingReferencesElement(XMLStreamWriter xmlw, DatasetVe writeEndTag(xmlw, fundingReference_check); } + + //Duplicates XmlWriterUtil.dto2Primitive private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { // give the single value of the given metadata for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { From ace656ce890d6bd4ecb1b7000995e0934a2c214e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:33:13 -0400 Subject: [PATCH 084/622] remove xml template doc, refactor to generate xml, adding OA fields --- .../pidproviders/doi/XmlMetadataTemplate.java | 819 +++++++++++++----- .../doi/datacite_metadata_template.xml | 2 +- 2 files changed, 617 insertions(+), 204 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 30e4dfd79cc..8a5fe9f9d32 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1,208 +1,599 @@ package edu.harvard.iq.dataverse.pidproviders.doi; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.net.MalformedURLException; +import java.net.URL; import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; import java.util.List; -import java.util.logging.Level; +import java.util.Map; +import java.util.Optional; import java.util.logging.Logger; +import java.util.stream.Collectors; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; + +import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import org.ocpsoft.common.util.Strings; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetAuthor; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; +import edu.harvard.iq.dataverse.DatasetFieldConstant; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.api.dto.DatasetDTO; +import edu.harvard.iq.dataverse.api.dto.FieldDTO; +import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; +import edu.harvard.iq.dataverse.export.DDIExporter; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; +import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; +import edu.harvard.iq.dataverse.util.StringUtil; +import edu.harvard.iq.dataverse.util.xml.XmlPrinter; +import edu.harvard.iq.dataverse.util.xml.XmlWriterUtil; +import jakarta.json.JsonObject; public class XmlMetadataTemplate { - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.DataCiteMetadataTemplate"); - private static String template; - - static { - try (InputStream in = XmlMetadataTemplate.class.getResourceAsStream("datacite_metadata_template.xml")) { - template = new String(in.readAllBytes(), StandardCharsets.UTF_8); - } catch (Exception e) { - logger.log(Level.SEVERE, "datacite metadata template load error"); - logger.log(Level.SEVERE, "String " + e.toString()); - logger.log(Level.SEVERE, "localized message " + e.getLocalizedMessage()); - logger.log(Level.SEVERE, "cause " + e.getCause()); - logger.log(Level.SEVERE, "message " + e.getMessage()); - } - } + private static final Logger logger = Logger.getLogger(XmlMetadataTemplate.class.getName()); - private String xmlMetadata; - private String identifier; - private List datafileIdentifiers; - private List creators; - private String title; - private String publisher; - private String publisherYear; - private List authors; - private String description; - private List contacts; - private List producers; - - public List getProducers() { - return producers; - } + public static final String XML_NAMESPACE = "http://datacite.org/schema/kernel-4"; + public static final String XML_SCHEMA_LOCATION = "http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.5/metadata.xsd"; + public static final String XML_XSI = "http://www.w3.org/2001/XMLSchema-instance"; + public static final String XML_SCHEMA_VERSION = "4.5"; - public void setProducers(List producers) { - this.producers = producers; - } + private DoiMetadata doiMetadata; - public List getContacts() { - return contacts; + public XmlMetadataTemplate() { } - public void setContacts(List contacts) { - this.contacts = contacts; + public XmlMetadataTemplate(DoiMetadata doiMetadata) { + this.doiMetadata = doiMetadata; } - public String getDescription() { - return description; + public String generateXML(DvObject dvObject) { + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { + generateXML(dvObject, outputStream); + + String xml = outputStream.toString(); + return XmlPrinter.prettyPrintXml(xml); + } catch (XMLStreamException | IOException e) { + logger.severe("Unable to generate DataCite XML for DOI: " + dvObject.getGlobalId().asString() + " : " + e.getMessage()); + e.printStackTrace(); + } + return null; } - public void setDescription(String description) { - this.description = description; - } + private void generateXML(DvObject dvObject, OutputStream outputStream) throws XMLStreamException { + // Could/should use dataset metadata language for metadata from DvObject itself? + String language = null; // machine locale? e.g. for Publisher which is global + String metadataLanguage = null; // when set, otherwise = language? + XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); + xmlw.writeStartElement("resource"); + xmlw.writeDefaultNamespace(XML_NAMESPACE); + xmlw.writeAttribute("xmlns:xsi", XML_XSI); + xmlw.writeAttribute("xsi:schemaLocation", XML_SCHEMA_LOCATION); + + writeIdentifier(xmlw, dvObject); + writeCreators(xmlw, doiMetadata.getAuthors()); + writeTitles(xmlw, dvObject, language); + writePublisher(xmlw, dvObject); + writePublicationYear(xmlw, dvObject); + writeSubjects(xmlw, dvObject); + writeContributors(xmlw, dvObject); + writeDates(xmlw, dvObject); + writeLanguage(xmlw, dvObject); + writeResourceType(xmlw, dvObject); + writeAlternateIdentifiers(xmlw, dvObject); + writeRelatedIdentifiers(xmlw, dvObject); + writeSize(xmlw, dvObject); + writeFormats(xmlw, dvObject); + writeVersion(xmlw, dvObject); + writeAccessRights(xmlw, dvObject); + writeDescriptions(xmlw, dvObject); + writeGeoLocations(xmlw, dvObject); + writeFundingReferences(xmlw, dvObject); - public List getAuthors() { - return authors; - } + StringBuilder contributorsElement = new StringBuilder(); + if (doiMetadata.getContacts() != null) { + for (String[] contact : doiMetadata.getContacts()) { + if (!contact[0].isEmpty()) { + contributorsElement.append("" + + StringEscapeUtils.escapeXml10(contact[0]) + ""); + if (!contact[1].isEmpty()) { + contributorsElement.append("" + StringEscapeUtils.escapeXml10(contact[1]) + ""); + } + contributorsElement.append(""); + } + } + } - public void setAuthors(List authors) { - this.authors = authors; - } + if (doiMetadata.getProducers() != null) { + for (String[] producer : doiMetadata.getProducers()) { + contributorsElement.append("" + StringEscapeUtils.escapeXml10(producer[0]) + + ""); + if (!producer[1].isEmpty()) { + contributorsElement.append("" + StringEscapeUtils.escapeXml10(producer[1]) + ""); + } + contributorsElement.append(""); + } + } - public XmlMetadataTemplate() { - } + String relIdentifiers = generateRelatedIdentifiers(dvObject); - public List getDatafileIdentifiers() { - return datafileIdentifiers; } - public void setDatafileIdentifiers(List datafileIdentifiers) { - this.datafileIdentifiers = datafileIdentifiers; - } - public XmlMetadataTemplate(String xmlMetaData) { - this.xmlMetadata = xmlMetaData; - Document doc = Jsoup.parseBodyFragment(xmlMetaData); - Elements identifierElements = doc.select("identifier"); - if (identifierElements.size() > 0) { - identifier = identifierElements.get(0).html(); - } - Elements creatorElements = doc.select("creatorName"); - creators = new ArrayList<>(); - for (Element creatorElement : creatorElements) { - creators.add(creatorElement.html()); + /** + * 3, Title(s) (with optional type sub-properties) (M) + * + * @param xmlw + * The Stream writer + * @param dvObject + * The dataset/file + * @param language + * the metadata language + * @return + * @throws XMLStreamException + */ + private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String language) throws XMLStreamException { + String title = doiMetadata.getTitle(); + String subTitle = null; + List altTitles = null; + // Only Datasets can have a subtitle or alternative titles + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersion(); + Optional subTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.subTitle)).findFirst(); + if (subTitleField.isPresent()) { + subTitle = subTitleField.get().getValue(); + } + Optional altTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.alternativeTitle)).findFirst(); + if (altTitleField.isPresent()) { + altTitles = altTitleField.get().getValues(); + } } - Elements titleElements = doc.select("title"); - if (titleElements.size() > 0) { - title = titleElements.get(0).html(); + + if (StringUtils.isNotBlank(title) || StringUtils.isNotBlank(subTitle) || (altTitles != null && !String.join("", altTitles).isBlank())) { + xmlw.writeStartElement("titles"); + XmlWriterUtil.writeFullElement(xmlw, "title", title, language); + + Map attributes = new HashMap(); + attributes.put("titleType", "Subtitle"); + + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, title); + + attributes.clear(); + attributes.put("titleType", "AlternativeTitle"); + + for (String altTitle : altTitles) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, altTitle); + } + + xmlw.writeEndElement(); } - Elements publisherElements = doc.select("publisher"); - if (publisherElements.size() > 0) { - publisher = publisherElements.get(0).html(); + } + + /** + * 1, Identifier (with mandatory type sub-property) (M) Note DataCite expects + * identifierType="DOI" but OpenAire allows several others (see + * https://guidelines.readthedocs.io/en/latest/data/field_identifier.html#d-identifiertype) + * Dataverse is currently only capable of creating DOI, Handle, or URL types + * from the OpenAire list (the last from PermaLinks) ToDo - If we add,e.g., an + * ARK or PURL provider, this code has to change or we'll need to refactor so + * that the identifiertype and id value can be sent via the JSON/ORE + * + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset or file with the PID + * @throws XMLStreamException + */ + private void writeIdentifier(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + GlobalId pid = dvObject.getGlobalId(); + // identifier with identifierType attribute + Map identifier_map = new HashMap(); + String identifierType = null; + String identifier = null; + switch (pid.getProtocol()) { + case AbstractDOIProvider.DOI_PROTOCOL: + identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); + identifier = pid.asRawIdentifier(); + break; + case HandlePidProvider.HDL_PROTOCOL: + identifierType = "Handle"; + identifier = pid.asRawIdentifier(); + break; + case PermaLinkPidProvider.PERMA_PROTOCOL: + identifierType = "URL"; + identifier = pid.asURL(); + break; } - Elements publisherYearElements = doc.select("publicationYear"); - if (publisherYearElements.size() > 0) { - publisherYear = publisherYearElements.get(0).html(); + Map attributeMap = new HashMap(); + attributeMap.put("identifierType", identifierType); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "identifier", attributeMap, identifier); + } + + /** + * 2, Creator (with optional given name, family name, name identifier and + * affiliation sub-properties) (M) + * + * @param xmlw + * The stream writer + * @param authorList + * - the list of authors + * @throws XMLStreamException + */ + public void writeCreators(XMLStreamWriter xmlw, List authorList) throws XMLStreamException { + // creators -> creator -> creatorName with nameType attribute, givenName, + // familyName, nameIdentifier + // write all creators + xmlw.writeStartElement("creators"); // + + if (authorList != null && !authorList.isEmpty()) { + for (DatasetAuthor author : authorList) { + String creatorName = StringEscapeUtils.escapeXml10(author.getName().getDisplayValue()); + String affiliation = null; + if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { + affiliation = StringEscapeUtils.escapeXml10(author.getAffiliation().getDisplayValue()); + } + String nameIdentifier = null; + String nameIdentifierScheme = null; + if (StringUtils.isNotBlank(author.getIdValue()) && StringUtils.isNotBlank(author.getIdType())) { + nameIdentifier = author.getIdValue(); + if(nameIdentifier != null) { + // Normalizes to the URL form of the identifier, returns null if the identifier + // is not valid given the type + nameIdentifier = author.getIdentifierAsUrl(); + } + nameIdentifierScheme = author.getIdType(); + } + + if (StringUtils.isNotBlank(creatorName)) { + xmlw.writeStartElement("creator"); // + JsonObject creatorObj = PersonOrOrgUtil.getPersonOrOrganization(creatorName, false, + StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid")); + + writeEntityElements(xmlw, "creator", null, creatorObj, affiliation, nameIdentifier, nameIdentifierScheme); + xmlw.writeEndElement(); // + } + + else { + // Authors unavailable + XmlWriterUtil.writeFullElement(xmlw, "creator", "creatorName", AbstractPidProvider.UNAVAILABLE); + } + } } + xmlw.writeEndElement(); // } - public String generateXML(DvObject dvObject) { + private void writePublisher(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // publisher should already be non null - :unav if it wasn't available + XmlWriterUtil.writeFullElement(xmlw, "publisher", doiMetadata.getPublisher()); + } + + private void writePublicationYear(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { // Can't use "UNKNOWN" here because DataCite will respond with "[facet // 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'" - String publisherYearFinal = "9999"; + String pubYear = "9999"; // FIXME: Investigate why this.publisherYear is sometimes null now that pull // request #4606 has been merged. - if (this.publisherYear != null) { + if (doiMetadata.getPublisherYear() != null) { // Added to prevent a NullPointerException when trying to destroy datasets when // using DataCite rather than EZID. - publisherYearFinal = this.publisherYear; + pubYear = doiMetadata.getPublisherYear(); } - xmlMetadata = template.replace("${identifier}", getIdentifier().trim()).replace("${title}", this.title) - .replace("${publisher}", this.publisher).replace("${publisherYear}", publisherYearFinal) - .replace("${description}", this.description); - - StringBuilder creatorsElement = new StringBuilder(); - if (authors != null && !authors.isEmpty()) { - for (DatasetAuthor author : authors) { - creatorsElement.append(""); - creatorsElement.append(author.getName().getDisplayValue()); - creatorsElement.append(""); - - if (author.getIdType() != null && author.getIdValue() != null && !author.getIdType().isEmpty() - && !author.getIdValue().isEmpty() && author.getAffiliation() != null - && !author.getAffiliation().getDisplayValue().isEmpty()) { - - if (author.getIdType().equals("ORCID")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } - if (author.getIdType().equals("ISNI")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } - if (author.getIdType().equals("LCNA")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } - } - if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { - creatorsElement - .append("" + author.getAffiliation().getDisplayValue() + ""); + XmlWriterUtil.writeFullElement(xmlw, "publicationYear", String.valueOf(pubYear)); + } + + /** + * 6, Subject (with scheme sub-property) R + * + * @param xmlw + * The Steam writer + * @param dvObject + * The Dataset/DataFile + * @throws XMLStreamException + */ + private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // subjects -> subject with subjectScheme and schemeURI attributes when + // available + boolean subjectsCreated = false; + List subjects = null; + List compoundKeywords = null; + List compoundTopics = null; + // Dataset Subject= Dataverse subject, keyword, and/or topic classification + // fields + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + dv.getDatasetSubjects(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.keyword)) { + compoundKeywords = dsf.getDatasetFieldCompoundValues(); + } else if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassification)) { + compoundTopics = dsf.getDatasetFieldCompoundValues(); } - creatorsElement.append(""); } - } else { - creatorsElement.append("").append(AbstractPidProvider.UNAVAILABLE) - .append(""); + } else if (dvObject instanceof DataFile df) { + subjects = df.getTagLabels(); + } + for (String subject : subjects) { + if (StringUtils.isNotBlank(subject)) { + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElement(xmlw, "subject", StringEscapeUtils.escapeXml10(subject)); + } } + for (DatasetFieldCompoundValue keywordFieldValue : compoundKeywords) { + String keyword = null; + String scheme = null; + String schemeUri = null; + + for (DatasetField subField : keywordFieldValue.getChildDatasetFields()) { + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.keyword: + keyword = subField.getValue(); + break; + case DatasetFieldConstant.keywordVocab: + scheme = subField.getValue(); + break; + case DatasetFieldConstant.keywordVocabURI: + schemeUri = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(keyword)) { + Map attributesMap = new HashMap(); + if (StringUtils.isNotBlank(scheme)) { + attributesMap.put("subjectScheme", scheme); + } + if (StringUtils.isNotBlank(schemeUri)) { + attributesMap.put("schemeURI", schemeUri); + } + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "subject", attributesMap, StringEscapeUtils.escapeXml10(keyword)); + } + } + for (DatasetFieldCompoundValue topicFieldValue : compoundTopics) { + String topic = null; + String scheme = null; + String schemeUri = null; + + for (DatasetField subField : topicFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.topicClassValue: + topic = subField.getValue(); + break; + case DatasetFieldConstant.topicClassVocab: + scheme = subField.getValue(); + break; + case DatasetFieldConstant.topicClassVocabURI: + schemeUri = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(topic)) { + Map attributesMap = new HashMap(); + if (StringUtils.isNotBlank(scheme)) { + attributesMap.put("subjectScheme", scheme); + } + if (StringUtils.isNotBlank(schemeUri)) { + attributesMap.put("schemeURI", schemeUri); + } + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "subject", attributesMap, StringEscapeUtils.escapeXml10(topic)); + } + } + if (subjectsCreated) { + xmlw.writeEndElement(); + } + } - xmlMetadata = xmlMetadata.replace("${creators}", creatorsElement.toString()); + /** + * 7, Contributor (with optional given name, family name, name identifier + * and affiliation sub-properties) + * + * @see #writeContributorElement(javax.xml.stream.XMLStreamWriter, + * java.lang.String, java.lang.String, java.lang.String) + * + * @param xmlw The stream writer + * @param dvObject The Dataset/DataFile + * @throws XMLStreamException + */ + private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + boolean contributorsCreated = false; + List compoundProducers = null; + List compoundDistributors = null; + List compoundContacts = null; + List compoundContributors = null; + // Dataset Subject= Dataverse subject, keyword, and/or topic classification + // fields + //ToDo Include for files? + /*if(dvObject instanceof DataFile df) { + dvObject = df.getOwner(); + }*/ + + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.producer: + compoundProducers = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.distributor: + compoundDistributors = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.contact: + compoundContacts = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.contributor: + compoundContributors = dsf.getDatasetFieldCompoundValues(); + } + } + } + + + for (DatasetFieldCompoundValue producerFieldValue : compoundProducers) { + String producer = null; + String affiliation = null; + + for (DatasetField subField : producerFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.producerName: + producer = subField.getValue(); + break; + case DatasetFieldConstant.producerAffiliation: + affiliation = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(producer)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(producer, false, false); + writeEntityElements(xmlw, "contributor", "Producer", entityObject, affiliation, null, null); + } - StringBuilder contributorsElement = new StringBuilder(); - if (this.getContacts() != null) { - for (String[] contact : this.getContacts()) { - if (!contact[0].isEmpty()) { - contributorsElement.append("" - + contact[0] + ""); - if (!contact[1].isEmpty()) { - contributorsElement.append("" + contact[1] + ""); - } - contributorsElement.append(""); + } + + for (DatasetFieldCompoundValue distributorFieldValue : compoundDistributors) { + String distributor = null; + String affiliation = null; + + for (DatasetField subField : distributorFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.distributorName: + distributor = subField.getValue(); + break; + case DatasetFieldConstant.distributorAffiliation: + affiliation = subField.getValue(); + break; } } + if (StringUtils.isNotBlank(distributor)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(distributor, false, false); + writeEntityElements(xmlw, "contributor", "Distributor", entityObject, affiliation, null, null); + } + } + for (DatasetFieldCompoundValue contactFieldValue : compoundContacts) { + String contact = null; + String affiliation = null; + + for (DatasetField subField : contactFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.datasetContactName: + contact = subField.getValue(); + break; + case DatasetFieldConstant.datasetContactAffiliation: + affiliation = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(contact)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(contact, false, false); + writeEntityElements(xmlw, "contributor", "ContactPerson", entityObject, affiliation, null, null); + } - if (this.getProducers() != null) { - for (String[] producer : this.getProducers()) { - contributorsElement.append("" + producer[0] - + ""); - if (!producer[1].isEmpty()) { - contributorsElement.append("" + producer[1] + ""); + } + for (DatasetFieldCompoundValue contributorFieldValue : compoundContributors) { + String contributor = null; + String contributorType = null; + + for (DatasetField subField : contributorFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.contributorName: + contributor = subField.getValue(); + break; + case DatasetFieldConstant.contributorType: + contributorType = subField.getValue().replace(" ", ""); + break; } - contributorsElement.append(""); } + // QDR - doesn't have Funder in the contributor type list. + // Using a string isn't i18n + if (StringUtils.isNotBlank(contributor) && !StringUtils.equalsIgnoreCase("Funder", contributorType)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(contributor, false, false); + writeEntityElements(xmlw, "contributor", contributorType, entityObject, null, null, null); + } + + } + + if (contributorsCreated) { + xmlw.writeEndElement(); } + } - String relIdentifiers = generateRelatedIdentifiers(dvObject); + private void writeEntityElements(XMLStreamWriter xmlw, String elementName, String type, JsonObject entityObject, String affiliation, String nameIdentifier, String nameIdentifierScheme) throws XMLStreamException { + xmlw.writeStartElement(elementName); + Map attributeMap = new HashMap(); + if (StringUtils.isNotBlank(type)) { + attributeMap.put("contributorType", type); + } + // person name=, + if (entityObject.getBoolean("isPerson")) { + attributeMap.put("nameType", "Personal"); + } else { + attributeMap.put("nameType", "Organizational"); + } + XmlWriterUtil.writeFullElementWithAttributes(xmlw, elementName + "Name", attributeMap, + StringEscapeUtils.escapeXml10(entityObject.getString("fullName"))); + if (entityObject.containsKey("givenName")) { + XmlWriterUtil.writeFullElement(xmlw, "givenName", StringEscapeUtils.escapeXml10(entityObject.getString("givenName"))); + } + if (entityObject.containsKey("familyName")) { + XmlWriterUtil.writeFullElement(xmlw, "familyName", StringEscapeUtils.escapeXml10(entityObject.getString("familyName"))); + } - xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); + if (nameIdentifier != null) { + attributeMap.clear(); + URL url; + try { + url = new URL(nameIdentifier); + String protocol = url.getProtocol(); + String authority = url.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + attributeMap.put("schemeURI", site); + attributeMap.put("nameIdentifierScheme", nameIdentifierScheme); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "nameIdentifier", attributeMap, nameIdentifier); + } catch (MalformedURLException e) { + logger.warning("DatasetAuthor.getIdentifierAsUrl returned a Malformed URL: " + nameIdentifier); + } + } + + if (StringUtils.isNotBlank(affiliation)) { + attributeMap.clear(); + if (affiliation.startsWith("https://ror.org/")) { - xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); - return xmlMetadata; + attributeMap.put("schemeURI", "https://ror.org"); + attributeMap.put("affiliationIdentifierScheme", "ROR"); + } + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "affiliation", attributeMap, StringEscapeUtils.escapeXml10(affiliation)); + } + xmlw.writeEndElement(); } private String generateRelatedIdentifiers(DvObject dvObject) { @@ -210,9 +601,67 @@ private String generateRelatedIdentifiers(DvObject dvObject) { StringBuilder sb = new StringBuilder(); if (dvObject.isInstanceofDataset()) { Dataset dataset = (Dataset) dvObject; + + List relatedPublications = dataset.getLatestVersionForCopy().getRelatedPublications(); + if (!relatedPublications.isEmpty()) { + for (DatasetRelPublication relatedPub : relatedPublications) { + String pubIdType = relatedPub.getIdType(); + String identifier = relatedPub.getIdNumber(); + /* + * Note - with identifier and url fields, it's not clear that there's a single + * way those two fields are used for all identifier types In QDR, at this time, + * doi and isbn types always have the raw number in the identifier field, + * whereas there are examples where URLs are in the identifier or url fields. + * The code here addresses those practices and is not generic. + */ + if (pubIdType != null) { + switch (pubIdType) { + case "doi": + if (identifier != null && identifier.length() != 0) { + appendIdentifier(sb, "DOI", "IsSupplementTo", "doi:" + identifier); + } + break; + case "isbn": + if (identifier != null && identifier.length() != 0) { + appendIdentifier(sb, "ISBN", "IsSupplementTo", "ISBN:" + identifier); + } + break; + case "url": + if (identifier != null && identifier.length() != 0) { + appendIdentifier(sb, "URL", "IsSupplementTo", identifier); + } else { + String pubUrl = relatedPub.getUrl(); + if (pubUrl != null && pubUrl.length() > 0) { + appendIdentifier(sb, "URL", "IsSupplementTo", pubUrl); + } + } + break; + default: + if (identifier != null && identifier.length() != 0) { + if (pubIdType.equalsIgnoreCase("arXiv")) { + pubIdType = "arXiv"; + } else if (pubIdType.equalsIgnoreCase("handle")) { + // Initial cap required for handle + pubIdType = "Handle"; + } else if (!pubIdType.equals("bibcode")) { + pubIdType = pubIdType.toUpperCase(); + } + // For all others, do a generic attempt to match the identifier type to the + // datacite schema and send the raw identifier as the value + appendIdentifier(sb, pubIdType, "IsSupplementTo", identifier); + } + break; + } + + } else { + logger.info(relatedPub.getIdNumber() + relatedPub.getUrl() + relatedPub.getTitle()); + } + } + } + if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - datafileIdentifiers = new ArrayList<>(); + List datafileIdentifiers = new ArrayList<>(); for (DataFile dataFile : dataset.getFiles()) { if (dataFile.getGlobalId() != null) { if (sb.toString().isEmpty()) { @@ -229,14 +678,23 @@ private String generateRelatedIdentifiers(DvObject dvObject) { } } else if (dvObject.isInstanceofDataFile()) { DataFile df = (DataFile) dvObject; - sb.append(""); - sb.append("" - + df.getOwner().getGlobalId() + ""); - sb.append(""); + appendIdentifier(sb, "DOI", "IsPartOf", df.getOwner().getGlobalId().asString()); + if (sb.length() != 0) { + // Should always be true + sb.append(""); + } } return sb.toString(); } + + private void appendIdentifier(StringBuilder sb, String idType, String relationType, String identifier) { + if (sb.toString().isEmpty()) { + sb.append(""); + } + sb.append("" + identifier + ""); + } + public void generateFileIdentifiers(DvObject dvObject) { if (dvObject.isInstanceofDataset()) { @@ -244,71 +702,26 @@ public void generateFileIdentifiers(DvObject dvObject) { if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - datafileIdentifiers = new ArrayList<>(); + List datafileIdentifiers = new ArrayList<>(); for (DataFile dataFile : dataset.getFiles()) { datafileIdentifiers.add(dataFile.getIdentifier()); - int x = xmlMetadata.indexOf("") - 1; - xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", dataFile.getIdentifier()); - xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" - + template.substring(x, template.length() - 1); + // int x = xmlMetadata.indexOf("") - 1; + // xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", + // dataFile.getIdentifier()); + // xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" + // + template.substring(x, template.length() - 1); } } else { - xmlMetadata = xmlMetadata.replace( - "${relatedIdentifier}", - ""); + // xmlMetadata = xmlMetadata.replace( + // "${relatedIdentifier}", + // ""); } } } - public static String getTemplate() { - return template; - } - - public static void setTemplate(String template) { - XmlMetadataTemplate.template = template; - } - - public String getIdentifier() { - return identifier; - } - - public void setIdentifier(String identifier) { - this.identifier = identifier; - } - - public List getCreators() { - return creators; - } - - public void setCreators(List creators) { - this.creators = creators; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getPublisher() { - return publisher; - } - - public void setPublisher(String publisher) { - this.publisher = publisher; - } - - public String getPublisherYear() { - return publisherYear; - } - - public void setPublisherYear(String publisherYear) { - this.publisherYear = publisherYear; - } - } \ No newline at end of file diff --git a/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml b/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml index abe7ce79972..8348691d4c7 100644 --- a/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml +++ b/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml @@ -1,5 +1,5 @@ - ${identifier} From dba03e2bb1597d4e01317139d950e305d0d9dec5 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:33:40 -0400 Subject: [PATCH 085/622] refactor source of XML info --- .../iq/dataverse/export/DataCiteExporter.java | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java index 8caf32b2df0..c21d6b5cd1a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java @@ -7,6 +7,7 @@ import io.gdcc.spi.export.ExportException; import io.gdcc.spi.export.Exporter; import io.gdcc.spi.export.XMLExporter; +import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.IOException; import java.io.OutputStream; @@ -20,11 +21,7 @@ */ @AutoService(Exporter.class) public class DataCiteExporter implements XMLExporter { - - private static String DEFAULT_XML_NAMESPACE = "http://datacite.org/schema/kernel-3"; - private static String DEFAULT_XML_SCHEMALOCATION = "http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd"; - private static String DEFAULT_XML_VERSION = "3.0"; - + public static final String NAME = "Datacite"; @Override @@ -60,17 +57,17 @@ public Boolean isAvailableToUsers() { @Override public String getXMLNameSpace() { - return DataCiteExporter.DEFAULT_XML_NAMESPACE; + return XmlMetadataTemplate.XML_NAMESPACE; } @Override public String getXMLSchemaLocation() { - return DataCiteExporter.DEFAULT_XML_SCHEMALOCATION; + return XmlMetadataTemplate.XML_SCHEMA_LOCATION; } @Override public String getXMLSchemaVersion() { - return DataCiteExporter.DEFAULT_XML_VERSION; + return XmlMetadataTemplate.XML_SCHEMA_VERSION; } } From af3e24b0b7bc1bff2c378f2a682455fe6aef0ee2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:34:09 -0400 Subject: [PATCH 086/622] add code to get raw alphanumeric pid value --- src/main/java/edu/harvard/iq/dataverse/GlobalId.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index a542cb52ac0..1c8783c5bd5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -100,6 +100,13 @@ public String asURL() { } return null; } + + public String asRawIdentifier() { + if (protocol == null || authority == null || identifier == null) { + return ""; + } + return authority + separator + identifier; + } From fa23884647c893285e456d749a741d6d36ac90eb Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 13:58:32 -0400 Subject: [PATCH 087/622] remove duplicate method --- .../edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java index e932307d3d0..8ec426ead1f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java @@ -145,13 +145,6 @@ public static void writeAttribute(XMLStreamWriter xmlw, String name, String valu } } - public static boolean checkParentElement(XMLStreamWriter xmlw, String elementName, boolean elementAdded) throws XMLStreamException { - if (!elementAdded) { - xmlw.writeStartElement(elementName); - } - - return true; - } public static void writeFullElementWithAttributes(XMLStreamWriter xmlw, String name, Map attributeMap, String value) throws XMLStreamException { if (!StringUtils.isEmpty(value)) { From 0d22d6c580df4aa689b019dfdc88321a59e02e4d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 13:59:22 -0400 Subject: [PATCH 088/622] dates, resourceType, alternate Ids --- .../pidproviders/doi/XmlMetadataTemplate.java | 224 +++++++++++++++++- 1 file changed, 215 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 8a5fe9f9d32..92bf7afd273 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -6,12 +6,14 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -26,6 +28,8 @@ import org.jsoup.select.Elements; import org.ocpsoft.common.util.Strings; +import edu.harvard.iq.dataverse.AlternativePersistentIdentifier; +import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetAuthor; @@ -33,9 +37,11 @@ import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetFieldValue; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.api.Util; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; @@ -207,8 +213,6 @@ private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String languag */ private void writeIdentifier(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { GlobalId pid = dvObject.getGlobalId(); - // identifier with identifierType attribute - Map identifier_map = new HashMap(); String identifierType = null; String identifier = null; switch (pid.getProtocol()) { @@ -315,9 +319,9 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt // subjects -> subject with subjectScheme and schemeURI attributes when // available boolean subjectsCreated = false; - List subjects = null; - List compoundKeywords = null; - List compoundTopics = null; + List subjects = new ArrayList(); + List compoundKeywords = new ArrayList(); + List compoundTopics = new ArrayList(); // Dataset Subject= Dataverse subject, keyword, and/or topic classification // fields if (dvObject instanceof Dataset d) { @@ -419,10 +423,10 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt */ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { boolean contributorsCreated = false; - List compoundProducers = null; - List compoundDistributors = null; - List compoundContacts = null; - List compoundContributors = null; + List compoundProducers = new ArrayList(); + List compoundDistributors = new ArrayList(); + List compoundContacts = new ArrayList(); + List compoundContributors = new ArrayList(); // Dataset Subject= Dataverse subject, keyword, and/or topic classification // fields //ToDo Include for files? @@ -596,6 +600,208 @@ private void writeEntityElements(XMLStreamWriter xmlw, String elementName, Strin xmlw.writeEndElement(); } + /** + * 8, Date (with type sub-property) (R) + * + * @param xmlw The Steam writer + * @param dvObject The dataset/datafile + * @throws XMLStreamException + */ + private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + boolean datesWritten = false; + String dateOfDistribution = null; + String dateOfProduction = null; + String dateOfDeposit = null; + Date releaseDate = null; + List datesOfCollection = new ArrayList(); + + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + releaseDate = dv.getReleaseTime(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.distributionDate: + dateOfDistribution = dsf.getValue(); + break; + case DatasetFieldConstant.productionDate: + dateOfProduction = dsf.getValue(); + break; + case DatasetFieldConstant.dateOfDeposit: + dateOfDeposit = dsf.getValue(); + break; + case DatasetFieldConstant.dateOfCollection: + datesOfCollection = dsf.getDatasetFieldCompoundValues(); + } + } + } + Map attributes = new HashMap(); + if (StringUtils.isNotBlank(dateOfDistribution)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Issued"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfDistribution); + } + // dates -> date with dateType attribute + + if (StringUtils.isNotBlank(dateOfProduction)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Created"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfProduction); + } + if (StringUtils.isNotBlank(dateOfDeposit)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Submitted"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfDeposit); + } + + if (releaseDate != null) { + String date = Util.getDateTimeFormat().format(releaseDate); + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + + attributes.put("dateType", "Available"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, date); + } + if (datesOfCollection != null) { + for (DatasetFieldCompoundValue collectionDateFieldValue : datesOfCollection) { + String startDate = null; + String endDate = null; + + for (DatasetField subField : collectionDateFieldValue.getChildDatasetFields()) { + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.dateOfCollectionStart: + startDate = subField.getValue(); + break; + case DatasetFieldConstant.dateOfCollectionEnd: + endDate = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Collected"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, (startDate + "/" + endDate).trim()); + } + } + } + if (datesWritten) { + xmlw.writeEndElement(); + } + } + + + // 9, Language (MA), language + private void writeLanguage(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + //Currently not supported. Spec indicates one 'primary' language. Could send the first entry in DatasetFieldConstant.language or send iff there is only one entry, and/or default to the machine's default lang? + return; + } + + // 10, ResourceType (with mandatory general type + // description sub- property) (M) + private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + List kindOfDataValues = new ArrayList(); + Map attributes = new HashMap(); + + attributes.put("resourceTypeGeneral", "Dataset"); + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.kindOfData: + kindOfDataValues = dsf.getControlledVocabularyValues(); + break; + } + + if (kindOfDataValues.isEmpty()) { + // Write an attribute only element if there are no kindOfData values. + xmlw.writeStartElement("resourceType"); + xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); + xmlw.writeEndElement(); + } else { + for (ControlledVocabularyValue kindOfDataValue : kindOfDataValues) { + String resourceType = kindOfDataValue.getStrValue(); + if (StringUtils.isNotBlank(resourceType)) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); + } + } + } + } + } + } + + /** + * 11 AlternateIdentifier (with type sub-property) (O) + * + * @param xmlw The Steam writer + * @param dvObject The dataset/datafile + * @throws XMLStreamException + */ + private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + List otherIdentifiers = new ArrayList(); + Set altPids = dvObject.getAlternativePersistentIndentifiers(); + + boolean alternatesWritten = false; + + Map attributes = new HashMap(); + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (DatasetFieldConstant.otherId.equals(dsf.getDatasetFieldType().getName())) { + otherIdentifiers = dsf.getDatasetFieldCompoundValues(); + break; + } + } + } + if (!altPids.isEmpty()) { + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); + } + for (AlternativePersistentIdentifier altPid : altPids) { + String identifierType = null; + String identifier = null; + switch (altPid.getProtocol()) { + case AbstractDOIProvider.DOI_PROTOCOL: + identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + case HandlePidProvider.HDL_PROTOCOL: + identifierType = "Handle"; + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + default: + // The AlternativePersistentIdentifier class isn't really ready for anything but + // doi or handle pids, but will add this as a default. + identifierType = ":unav"; + identifier = altPid.getAuthority() + altPid.getIdentifier(); + break; + } + attributes.put("alternativeIdentifierType", identifierType); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + + } + for (DatasetFieldCompoundValue otherIdentifier : otherIdentifiers) { + String identifierType = null; + String identifier = null; + for (DatasetField subField : otherIdentifier.getChildDatasetFields()) { + identifierType = ":unav"; + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.otherIdAgency: + identifierType = subField.getValue(); + break; + case DatasetFieldConstant.otherIdValue: + identifier = subField.getValue(); + break; + } + } + attributes.put("alternativeIdentifierType", identifierType); + if (!StringUtils.isBlank(identifier)) { + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); + + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + } + } + if (alternatesWritten) { + xmlw.writeEndElement(); + } + } + private String generateRelatedIdentifiers(DvObject dvObject) { StringBuilder sb = new StringBuilder(); From d69bf414f3cc3b5fc8e0214b0c5c4fc6f7ec155e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 8 May 2024 13:19:56 -0400 Subject: [PATCH 089/622] more methods --- .../pidproviders/doi/XmlMetadataTemplate.java | 313 +++++++++++++----- 1 file changed, 223 insertions(+), 90 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 92bf7afd273..eb2465257a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -47,6 +47,7 @@ import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; import edu.harvard.iq.dataverse.export.DDIExporter; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; @@ -146,7 +147,6 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM } - /** * 3, Title(s) (with optional type sub-properties) (M) * @@ -802,132 +802,265 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) } } - private String generateRelatedIdentifiers(DvObject dvObject) { + /** + * 12, RelatedIdentifier (with type and relation type sub-properties) (R) + * + * @param xmlw The Steam writer + * @param dvObject the dataset/datafile + * @throws XMLStreamException + */ + private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + + boolean relatedIdentifiersWritten = false; + + Map attributes = new HashMap(); - StringBuilder sb = new StringBuilder(); - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; + if (dvObject instanceof Dataset dataset) { List relatedPublications = dataset.getLatestVersionForCopy().getRelatedPublications(); if (!relatedPublications.isEmpty()) { for (DatasetRelPublication relatedPub : relatedPublications) { + attributes.clear(); + String pubIdType = relatedPub.getIdType(); String identifier = relatedPub.getIdNumber(); + String url = relatedPub.getUrl(); /* * Note - with identifier and url fields, it's not clear that there's a single - * way those two fields are used for all identifier types In QDR, at this time, - * doi and isbn types always have the raw number in the identifier field, - * whereas there are examples where URLs are in the identifier or url fields. - * The code here addresses those practices and is not generic. + * way those two fields are used for all identifier types. The code here is + * ~best effort to interpret those fields. */ - if (pubIdType != null) { - switch (pubIdType) { - case "doi": - if (identifier != null && identifier.length() != 0) { - appendIdentifier(sb, "DOI", "IsSupplementTo", "doi:" + identifier); - } - break; - case "isbn": - if (identifier != null && identifier.length() != 0) { - appendIdentifier(sb, "ISBN", "IsSupplementTo", "ISBN:" + identifier); - } - break; - case "url": - if (identifier != null && identifier.length() != 0) { - appendIdentifier(sb, "URL", "IsSupplementTo", identifier); - } else { - String pubUrl = relatedPub.getUrl(); - if (pubUrl != null && pubUrl.length() > 0) { - appendIdentifier(sb, "URL", "IsSupplementTo", pubUrl); - } - } - break; - default: - if (identifier != null && identifier.length() != 0) { - if (pubIdType.equalsIgnoreCase("arXiv")) { - pubIdType = "arXiv"; - } else if (pubIdType.equalsIgnoreCase("handle")) { - // Initial cap required for handle - pubIdType = "Handle"; - } else if (!pubIdType.equals("bibcode")) { - pubIdType = pubIdType.toUpperCase(); - } - // For all others, do a generic attempt to match the identifier type to the - // datacite schema and send the raw identifier as the value - appendIdentifier(sb, pubIdType, "IsSupplementTo", identifier); - } - break; + pubIdType = getCanonicalPublicationType(pubIdType); + + // Prefer url if set, otherwise check identifier + String relatedIdentifier = url; + if (StringUtils.isBlank(relatedIdentifier)) { + relatedIdentifier = identifier; + } + // For types where we understand the protocol, get the canonical form + switch (pubIdType) { + case "DOI": + if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { + relatedIdentifier = "doi:" + relatedIdentifier; + } + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + relatedIdentifier = null; } + break; + case "Handle": + if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { + relatedIdentifier = "hdl:" + relatedIdentifier; + } + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + relatedIdentifier = null; + } + break; + case "URL": + break; + default: + + // For non-URL types, if a URL is given, split the string to get a schemeUri + try { + URL relatedUrl = new URL(relatedIdentifier); + String protocol = relatedUrl.getProtocol(); + String authority = relatedUrl.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + relatedIdentifier = relatedIdentifier.substring(site.length()); + attributes.put("schemeURI", site); + } catch (MalformedURLException e) { + // Just an identifier + } + } - } else { - logger.info(relatedPub.getIdNumber() + relatedPub.getUrl() + relatedPub.getTitle()); + if (StringUtils.isNotBlank(relatedIdentifier)) { + // Still have a valid entry + attributes.put("relatedIdentifierType", pubIdType); + attributes.put("relationType", "IsSupplementTo"); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, relatedIdentifier); } } } - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - - List datafileIdentifiers = new ArrayList<>(); + attributes.clear(); + attributes.put("relationType", "HasPart"); for (DataFile dataFile : dataset.getFiles()) { - if (dataFile.getGlobalId() != null) { - if (sb.toString().isEmpty()) { - sb.append(""); + GlobalId pid = dataFile.getGlobalId(); + if (pid != null) { + String pubIdType = getCanonicalPublicationType(pid.getProtocol()); + if (pubIdType != null) { + attributes.put("relatedIdentifierType", pubIdType); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, pid.asRawIdentifier()); } - sb.append("" - + dataFile.getGlobalId() + ""); } } - - if (!sb.toString().isEmpty()) { - sb.append(""); - } } - } else if (dvObject.isInstanceofDataFile()) { - DataFile df = (DataFile) dvObject; - appendIdentifier(sb, "DOI", "IsPartOf", df.getOwner().getGlobalId().asString()); - if (sb.length() != 0) { - // Should always be true - sb.append(""); + } else if (dvObject instanceof DataFile df) { + GlobalId pid = df.getOwner().getGlobalId(); + if (pid != null) { + String pubIdType = getCanonicalPublicationType(pid.getProtocol()); + if (pubIdType != null) { + + attributes.clear(); + attributes.put("relationType", "IsPartOf"); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, pid.asRawIdentifier()); + } } } - return sb.toString(); + if (relatedIdentifiersWritten) { + xmlw.writeEndElement(); + } } - private void appendIdentifier(StringBuilder sb, String idType, String relationType, String identifier) { - if (sb.toString().isEmpty()) { - sb.append(""); + static HashMap relatedIdentifierTypeMap = new HashMap(); + + private static String getCanonicalPublicationType(String pubIdType) { + if (relatedIdentifierTypeMap.isEmpty()) { + relatedIdentifierTypeMap.put("ARK".toLowerCase(), "ARK"); + relatedIdentifierTypeMap.put("arXiv", "arXiv"); + relatedIdentifierTypeMap.put("bibcode".toLowerCase(), "bibcode"); + relatedIdentifierTypeMap.put("DOI".toLowerCase(), "DOI"); + relatedIdentifierTypeMap.put("EAN13".toLowerCase(), "EAN13"); + relatedIdentifierTypeMap.put("EISSN".toLowerCase(), "EISSN"); + relatedIdentifierTypeMap.put("Handle".toLowerCase(), "Handle"); + relatedIdentifierTypeMap.put("IGSN".toLowerCase(), "IGSN"); + relatedIdentifierTypeMap.put("ISBN".toLowerCase(), "ISBN"); + relatedIdentifierTypeMap.put("ISSN".toLowerCase(), "ISSN"); + relatedIdentifierTypeMap.put("ISTC".toLowerCase(), "ISTC"); + relatedIdentifierTypeMap.put("LISSN".toLowerCase(), "LISSN"); + relatedIdentifierTypeMap.put("LSID".toLowerCase(), "LSID"); + relatedIdentifierTypeMap.put("PISSN".toLowerCase(), "PISSN"); + relatedIdentifierTypeMap.put("PMID".toLowerCase(), "PMID"); + relatedIdentifierTypeMap.put("PURL".toLowerCase(), "PURL"); + relatedIdentifierTypeMap.put("UPC".toLowerCase(), "UPC"); + relatedIdentifierTypeMap.put("URL".toLowerCase(), "URL"); + relatedIdentifierTypeMap.put("URN".toLowerCase(), "URN"); + relatedIdentifierTypeMap.put("WOS".toLowerCase(), "WOS"); + // Add entry for Handle protocol so this can be used with GlobalId/getProtocol() + relatedIdentifierTypeMap.put("hdl".toLowerCase(), "Handle"); } - sb.append("" + identifier + ""); + return relatedIdentifierTypeMap.get(pubIdType); } - public void generateFileIdentifiers(DvObject dvObject) { + private void writeSize(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // sizes -> size + boolean sizesWritten = false; + List dataFiles = new ArrayList(); - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; + if (dvObject instanceof Dataset dataset) { + dataFiles = dataset.getFiles(); + } else if (dvObject instanceof DataFile df) { + dataFiles.add(df); + } + if (dataFiles != null && !dataFiles.isEmpty()) { + for (DataFile dataFile : dataFiles) { + Long size = dataFile.getFilesize(); + if (size != -1) { + sizesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "sizes", sizesWritten); + XmlWriterUtil.writeFullElement(xmlw, "size", size.toString()); + } + } + } + if (sizesWritten) { + xmlw.writeEndElement(); + } - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { + } - List datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - datafileIdentifiers.add(dataFile.getIdentifier()); - // int x = xmlMetadata.indexOf("") - 1; - // xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", - // dataFile.getIdentifier()); - // xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" - // + template.substring(x, template.length() - 1); + private void writeFormats(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + boolean formatsWritten = false; + List dataFiles = new ArrayList(); + + if (dvObject instanceof Dataset dataset) { + dataFiles = dataset.getFiles(); + } else if (dvObject instanceof DataFile df) { + dataFiles.add(df); + } + if (dataFiles != null && !dataFiles.isEmpty()) { + for (DataFile dataFile : dataFiles) { + String format = dataFile.getContentType(); + if (StringUtils.isNotBlank(format)) { + formatsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "formats", formatsWritten); + XmlWriterUtil.writeFullElement(xmlw, "format", format); } + /* Should original formats be sent? What about original sizes above? + if(dataFile.isTabularData()) { + String originalFormat = dataFile.getOriginalFileFormat(); + if(StringUtils.isNotBlank(originalFormat)) { + XmlWriterUtil.writeFullElement(xmlw, "format", format); + } + }*/ + } + } + if (formatsWritten) { + xmlw.writeEndElement(); + } + + } - } else { - // xmlMetadata = xmlMetadata.replace( - // "${relatedIdentifier}", - // ""); + private void writeVersion(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + Dataset d = null; + if(dvObject instanceof Dataset) { + d = (Dataset) dvObject; + } else if (dvObject instanceof DataFile) { + d = ((DataFile) dvObject).getOwner(); + } + if(d !=null) { + DatasetVersion dv = d.getLatestVersionForCopy(); + String version = dv.getFriendlyVersionNumber(); + if (StringUtils.isNotBlank(version)) { + XmlWriterUtil.writeFullElement(xmlw, "version", version); } } + } + private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) { + // rightsList -> rights with rightsURI attribute + xmlw.writeStartElement("rightsList"); // + + // set terms from the info:eu-repo-Access-Terms vocabulary + writeRightsHeader(xmlw, language); + boolean restrict = false; + boolean closed = false; + + if (datasetVersionDTO.isFileAccessRequest()) { + restrict = true; + } + if (datasetVersionDTO.getFiles() != null) { + for (int i = 0; i < datasetVersionDTO.getFiles().size(); i++) { + if (datasetVersionDTO.getFiles().get(i).isRestricted()) { + closed = true; + break; + } + } + } + + if (restrict && closed) { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/restrictedAccess"); + } else if (!restrict && closed) { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/closedAccess"); + } else { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/openAccess"); + } + xmlw.writeEndElement(); // + + writeRightsHeader(xmlw, language); + if (datasetVersionDTO.getLicense() != null) { + xmlw.writeAttribute("rightsURI", datasetVersionDTO.getLicense().getUri()); + xmlw.writeCharacters(datasetVersionDTO.getLicense().getName()); + } + xmlw.writeEndElement(); // + xmlw.writeEndElement(); // + } } \ No newline at end of file From 04b367f641fe8e8da77c8eceafd7a012985f9a1f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 15 May 2024 10:41:07 -0400 Subject: [PATCH 090/622] only one field to look for --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 943693355a3..d723cf3d528 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1344,6 +1344,7 @@ public List getGeographicCoverage() { } } + break; } return geoCoverages; } From 003431dde79bc7b80077c1aa6d0998329e85f4e3 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 15 May 2024 10:41:30 -0400 Subject: [PATCH 091/622] use common util method --- .../edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 0c861cb6c09..c0e3057696a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -1715,12 +1715,12 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // range.getBeginValueType().getName().equals(DB_VAR_RANGE_TYPE_POINT)) { if (range.getBoolean("hasBeginValueType") && range.getBoolean("isBeginValueTypePoint")) { if (range.containsKey("beginValue")) { - invalrngAdded = XmlWriterUtil.checkParentElement(xmlw, "invalrng", invalrngAdded); + invalrngAdded = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "invalrng", invalrngAdded); xmlw.writeEmptyElement("item"); XmlWriterUtil.writeAttribute(xmlw, "VALUE", range.getString("beginValue")); } } else { - invalrngAdded = XmlWriterUtil.checkParentElement(xmlw, "invalrng", invalrngAdded); + invalrngAdded = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "invalrng", invalrngAdded); xmlw.writeEmptyElement("range"); if (range.getBoolean("hasBeginValueType") && range.containsKey("beginValue")) { if (range.getBoolean("isBeginValueTypeMin")) { From fea2f5e01d9a9e3d37f1714e3aaba8dc32f84ab0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 15 May 2024 10:42:29 -0400 Subject: [PATCH 092/622] access rights descriptions, geolocations, funding refs --- .../pidproviders/doi/XmlMetadataTemplate.java | 297 +++++++++++++++--- 1 file changed, 249 insertions(+), 48 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index eb2465257a1..be55b7a4837 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -41,16 +41,20 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.api.Util; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.export.DDIExporter; +import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; @@ -117,34 +121,6 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM writeDescriptions(xmlw, dvObject); writeGeoLocations(xmlw, dvObject); writeFundingReferences(xmlw, dvObject); - - StringBuilder contributorsElement = new StringBuilder(); - if (doiMetadata.getContacts() != null) { - for (String[] contact : doiMetadata.getContacts()) { - if (!contact[0].isEmpty()) { - contributorsElement.append("" - + StringEscapeUtils.escapeXml10(contact[0]) + ""); - if (!contact[1].isEmpty()) { - contributorsElement.append("" + StringEscapeUtils.escapeXml10(contact[1]) + ""); - } - contributorsElement.append(""); - } - } - } - - if (doiMetadata.getProducers() != null) { - for (String[] producer : doiMetadata.getProducers()) { - contributorsElement.append("" + StringEscapeUtils.escapeXml10(producer[0]) - + ""); - if (!producer[1].isEmpty()) { - contributorsElement.append("" + StringEscapeUtils.escapeXml10(producer[1]) + ""); - } - contributorsElement.append(""); - } - } - - String relIdentifiers = generateRelatedIdentifiers(dvObject); - } /** @@ -1025,42 +1001,267 @@ private void writeVersion(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStr } - private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) { + private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { // rightsList -> rights with rightsURI attribute xmlw.writeStartElement("rightsList"); // // set terms from the info:eu-repo-Access-Terms vocabulary - writeRightsHeader(xmlw, language); - boolean restrict = false; + xmlw.writeStartElement("rights"); // + DatasetVersion dv = null; boolean closed = false; + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + closed = dv.isHasRestrictedFile(); + } else if (dvObject instanceof DataFile df) { + dv = df.getOwner().getLatestVersionForCopy(); - if (datasetVersionDTO.isFileAccessRequest()) { - restrict = true; - } - if (datasetVersionDTO.getFiles() != null) { - for (int i = 0; i < datasetVersionDTO.getFiles().size(); i++) { - if (datasetVersionDTO.getFiles().get(i).isRestricted()) { - closed = true; - break; - } - } + closed = df.isRestricted(); } + TermsOfUseAndAccess terms = dv.getTermsOfUseAndAccess(); + boolean requestsAllowed = terms.isFileAccessRequest(); + License license = terms.getLicense(); - if (restrict && closed) { + if (requestsAllowed && closed) { xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/restrictedAccess"); - } else if (!restrict && closed) { + } else if (!requestsAllowed && closed) { xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/closedAccess"); } else { xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/openAccess"); } xmlw.writeEndElement(); // + xmlw.writeStartElement("rights"); // - writeRightsHeader(xmlw, language); - if (datasetVersionDTO.getLicense() != null) { - xmlw.writeAttribute("rightsURI", datasetVersionDTO.getLicense().getUri()); - xmlw.writeCharacters(datasetVersionDTO.getLicense().getName()); + if (license != null) { + xmlw.writeAttribute("rightsURI", license.getUri().toString()); + xmlw.writeCharacters(license.getName()); + } else { + xmlw.writeAttribute("rightsURI", DatasetUtil.getLicenseURI(dv)); + xmlw.writeCharacters(BundleUtil.getStringFromBundle("license.custom.description")); + ; } xmlw.writeEndElement(); // - xmlw.writeEndElement(); // + xmlw.writeEndElement(); // + } + + private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // descriptions -> description with descriptionType attribute + boolean descriptionsWritten = false; + List descriptions = null; + DatasetVersion dv = null; + + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + dv.getDescriptions(); + } else if (dvObject instanceof DataFile df) { + String description = df.getDescription(); + if (description != null) { + descriptions = new ArrayList(); + descriptions.add(description); + } + } + Map attributes = new HashMap(); + attributes.put("descriptionType", "Abstract"); + for (String description : descriptions) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, description); + ; + } + + if (dv != null) { + List dsfs = dv.getDatasetFields(); + + for (DatasetField dsf : dsfs) { + + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.software: + attributes.clear(); + attributes.put("descriptionType", "TechnicalInfo"); + List dsfcvs = dsf.getDatasetFieldCompoundValues(); + for (DatasetFieldCompoundValue dsfcv : dsfcvs) { + + String softwareName = null; + String softwareVersion = null; + List childDsfs = dsfcv.getChildDatasetFields(); + for (DatasetField childDsf : childDsfs) { + if (DatasetFieldConstant.softwareName.equals(childDsf.getDatasetFieldType().getName())) { + softwareName = childDsf.getValue(); + } else if (DatasetFieldConstant.softwareVersion.equals(childDsf.getDatasetFieldType().getName())) { + softwareVersion = childDsf.getValue(); + } + } + if (StringUtils.isNotBlank(softwareName)) { + if (StringUtils.isNotBlank(softwareVersion)) { + } + softwareName = softwareName + ", " + softwareVersion; + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, softwareName); + } + } + break; + case DatasetFieldConstant.originOfSources: + case DatasetFieldConstant.characteristicOfSources: + case DatasetFieldConstant.accessToSources: + attributes.clear(); + attributes.put("descriptionType", "Methods"); + String method = dsf.getValue(); + if (StringUtils.isNotBlank(method)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, method); + + } + break; + case DatasetFieldConstant.series: + attributes.clear(); + attributes.put("descriptionType", "SeriesInformation"); + dsfcvs = dsf.getDatasetFieldCompoundValues(); + for (DatasetFieldCompoundValue dsfcv : dsfcvs) { + List childDsfs = dsfcv.getChildDatasetFields(); + for (DatasetField childDsf : childDsfs) { + + if (DatasetFieldConstant.seriesInformation.equals(childDsf.getDatasetFieldType().getName())) { + String seriesInformation = childDsf.getValue(); + if (StringUtils.isNotBlank(seriesInformation)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, seriesInformation); + } + break; + } + } + } + break; + case DatasetFieldConstant.notesText: + attributes.clear(); + attributes.put("descriptionType", "Other"); + String notesText = dsf.getValue(); + if (StringUtils.isNotBlank(notesText)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, notesText); + } + break; + + } + } + + } + + if (descriptionsWritten) { + xmlw.writeEndElement(); // + } + } + + private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + if (dvObject instanceof Dataset d) { + boolean geoLocationsWritten = false; + DatasetVersion dv = d.getLatestVersionForCopy(); + + List places = dv.getGeographicCoverage(); + if (places != null && !places.isEmpty()) { + // geoLocationPlace + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + + for (String[] place : places) { + ArrayList placeList = new ArrayList(); + for (String placePart : place) { + placeList.add(placePart); + } + XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", Strings.join(placeList, ", ")); + } + } + boolean boundingBoxFound = false; + boolean productionPlaceFound = false; + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.geographicBoundingBox: + boundingBoxFound = true; + for (DatasetFieldCompoundValue dsfcv : dsf.getDatasetFieldCompoundValues()) { + List childDsfs = dsfcv.getChildDatasetFields(); + String nLatitude = null; + String sLatitude = null; + String eLongitude = null; + String wLongitude = null; + for (DatasetField childDsf : childDsfs) { + switch (childDsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.northLatitude: + nLatitude = childDsf.getValue(); + break; + case DatasetFieldConstant.southLatitude: + sLatitude = childDsf.getValue(); + break; + case DatasetFieldConstant.eastLongitude: + eLongitude = childDsf.getValue(); + break; + case DatasetFieldConstant.westLongitude: + wLongitude = childDsf.getValue(); + + } + } + if (StringUtils.isNoneBlank(wLongitude, eLongitude, nLatitude, sLatitude)) { + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + if (wLongitude.equals(eLongitude) && nLatitude.equals(sLatitude)) { + // A point + xmlw.writeStartElement("geoLocationPoint"); + XmlWriterUtil.writeFullElement(xmlw, "pointLongitude", eLongitude); + XmlWriterUtil.writeFullElement(xmlw, "pointLatitude", sLatitude); + xmlw.writeEndElement(); + } else { + // A box + xmlw.writeStartElement("geoLocationBox"); + XmlWriterUtil.writeFullElement(xmlw, "westBoundLongitude", wLongitude); + XmlWriterUtil.writeFullElement(xmlw, "eastBoundLongitude", eLongitude); + XmlWriterUtil.writeFullElement(xmlw, "southBoundLatitude", sLatitude); + XmlWriterUtil.writeFullElement(xmlw, "northBoundLatitude", nLatitude); + xmlw.writeEndElement(); + + } + } + } + case DatasetFieldConstant.productionPlace: + productionPlaceFound = true; + // geoLocationPlace + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + List prodPlaces = dsf.getValues(); + for (String prodPlace : prodPlaces) { + XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", prodPlace); + } + break; + } + if (boundingBoxFound && productionPlaceFound) { + break; + } + } + if (geoLocationsWritten) { + xmlw.writeEndElement(); // + } + } + + } + + + private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // fundingReferences -> fundingReference -> funderName, awardNumber + boolean fundingReferenceWritten = false; + DatasetVersion dv = null; + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + } else if (dvObject instanceof DataFile df) { + dv = df.getOwner().getLatestVersionForCopy(); + } + if (dv != null) { + List funders = dv.getFunders(); + if (!funders.isEmpty()) { + + for (String funder : funders) { + if (!StringUtils.isBlank(funder)) { + fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); + xmlw.writeStartElement("fundingReference"); // + XmlWriterUtil.writeFullElement(xmlw, "funderName", funder); + xmlw.writeEndElement(); // + } + } + if (fundingReferenceWritten) { + xmlw.writeEndElement(); // + } + } + } } } \ No newline at end of file From 3c52b6a2031a55c8840948681930a1824b02820b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 17 May 2024 15:36:18 -0400 Subject: [PATCH 093/622] altTitles npe --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index be55b7a4837..a2c744be2ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -138,7 +138,7 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String language) throws XMLStreamException { String title = doiMetadata.getTitle(); String subTitle = null; - List altTitles = null; + List altTitles = new ArrayList<>(); // Only Datasets can have a subtitle or alternative titles if (dvObject instanceof Dataset d) { DatasetVersion dv = d.getLatestVersion(); From bab2a0d270b766916dfd9578fd25ab05332f0958 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 18 May 2024 12:25:07 -0400 Subject: [PATCH 094/622] fixes and test --- .../pidproviders/doi/XmlMetadataTemplate.java | 64 ++++++----- .../doi/datacite/XmlMetadataTemplateTest.java | 108 ++++++++++++++++++ 2 files changed, 144 insertions(+), 28 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index a2c744be2ed..e9b7b0faa26 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -21,6 +21,7 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; + import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -84,6 +85,7 @@ public String generateXML(DvObject dvObject) { generateXML(dvObject, outputStream); String xml = outputStream.toString(); + logger.info(xml); return XmlPrinter.prettyPrintXml(xml); } catch (XMLStreamException | IOException e) { logger.severe("Unable to generate DataCite XML for DOI: " + dvObject.getGlobalId().asString() + " : " + e.getMessage()); @@ -98,10 +100,11 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM String metadataLanguage = null; // when set, otherwise = language? XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); xmlw.writeStartElement("resource"); + xmlw.writeDefaultNamespace(XML_NAMESPACE); xmlw.writeAttribute("xmlns:xsi", XML_XSI); xmlw.writeAttribute("xsi:schemaLocation", XML_SCHEMA_LOCATION); - + writeIdentifier(xmlw, dvObject); writeCreators(xmlw, doiMetadata.getAuthors()); writeTitles(xmlw, dvObject, language); @@ -121,6 +124,8 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM writeDescriptions(xmlw, dvObject); writeGeoLocations(xmlw, dvObject); writeFundingReferences(xmlw, dvObject); + xmlw.writeEndElement(); + xmlw.flush(); } /** @@ -726,32 +731,34 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) } } } - if (!altPids.isEmpty()) { + + if (altPids != null && !altPids.isEmpty()) { alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); - } - for (AlternativePersistentIdentifier altPid : altPids) { - String identifierType = null; - String identifier = null; - switch (altPid.getProtocol()) { - case AbstractDOIProvider.DOI_PROTOCOL: - identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); - identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); - break; - case HandlePidProvider.HDL_PROTOCOL: - identifierType = "Handle"; - identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); - break; - default: - // The AlternativePersistentIdentifier class isn't really ready for anything but - // doi or handle pids, but will add this as a default. - identifierType = ":unav"; - identifier = altPid.getAuthority() + altPid.getIdentifier(); - break; - } - attributes.put("alternativeIdentifierType", identifierType); - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + for (AlternativePersistentIdentifier altPid : altPids) { + String identifierType = null; + String identifier = null; + switch (altPid.getProtocol()) { + case AbstractDOIProvider.DOI_PROTOCOL: + identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + case HandlePidProvider.HDL_PROTOCOL: + identifierType = "Handle"; + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + default: + // The AlternativePersistentIdentifier class isn't really ready for anything but + // doi or handle pids, but will add this as a default. + identifierType = ":unav"; + identifier = altPid.getAuthority() + altPid.getIdentifier(); + break; + } + attributes.put("alternativeIdentifierType", identifierType); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + } } + for (DatasetFieldCompoundValue otherIdentifier : otherIdentifiers) { String identifierType = null; String identifier = null; @@ -1061,10 +1068,11 @@ private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws X } Map attributes = new HashMap(); attributes.put("descriptionType", "Abstract"); - for (String description : descriptions) { - descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, description); - ; + if (descriptions != null) { + for (String description : descriptions) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, description); + } } if (dv != null) { diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java new file mode 100644 index 00000000000..e576398a474 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java @@ -0,0 +1,108 @@ +package edu.harvard.iq.dataverse.pidproviders.doi.datacite; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldConstant; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; +import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; +import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactoryBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.DoiMetadata; +import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.extension.ExtendWith; + +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; + +@ExtendWith(MockitoExtension.class) +@LocalJvmSettings +@JvmSetting(key = JvmSettings.SITE_URL, value = "https://example.com") + +public class XmlMetadataTemplateTest { + + static DataverseServiceBean dataverseSvc; + static SettingsServiceBean settingsSvc; + static PidProviderFactoryBean pidService; + static final String DEFAULT_NAME = "LibraScholar"; + + @BeforeAll + public static void setupMocks() { + dataverseSvc = Mockito.mock(DataverseServiceBean.class); + settingsSvc = Mockito.mock(SettingsServiceBean.class); + BrandingUtil.injectServices(dataverseSvc, settingsSvc); + + // initial values (needed here for other tests where this method is reused!) + Mockito.when(settingsSvc.getValueForKey(SettingsServiceBean.Key.InstallationName)).thenReturn(DEFAULT_NAME); + Mockito.when(dataverseSvc.getRootDataverseName()).thenReturn(DEFAULT_NAME); + + pidService = Mockito.mock(PidProviderFactoryBean.class); + Mockito.when(pidService.isGlobalIdLocallyUnique(any(GlobalId.class))).thenReturn(true); + Mockito.when(pidService.getProducer()).thenReturn("RootDataverse"); + + } + + /** + */ + @Test + public void testDataCiteXMLCreation() throws IOException { + DoiMetadata doiMetadata = new DoiMetadata(); + doiMetadata.setTitle("A Title"); + List creators = new ArrayList(); + creators.add("Alice"); + creators.add("Bob"); + doiMetadata.setCreators(creators); + doiMetadata.setPublisher("Dataverse"); + XmlMetadataTemplate template = new XmlMetadataTemplate(doiMetadata); + + Dataset d = new Dataset(); + GlobalId doi = new GlobalId("doi", "10.5072", "FK2/ABCDEF", null, null, null); + d.setGlobalId(doi); + DatasetVersion dv = new DatasetVersion(); + TermsOfUseAndAccess toa = new TermsOfUseAndAccess(); + toa.setTermsOfUse("Some terms"); + dv.setTermsOfUseAndAccess(toa); + dv.setDataset(d); + DatasetFieldType primitiveDSFType = new DatasetFieldType(DatasetFieldConstant.title, + DatasetFieldType.FieldType.TEXT, false); + DatasetField testDatasetField = new DatasetField(); + + dv.setVersionState(VersionState.DRAFT); + + testDatasetField.setDatasetVersion(dv); + testDatasetField.setDatasetFieldType(primitiveDSFType); + testDatasetField.setSingleValue("First Title"); + List fields = new ArrayList<>(); + fields.add(testDatasetField); + dv.setDatasetFields(fields); + ArrayList dsvs = new ArrayList<>(); + dsvs.add(0, dv); + d.setVersions(dsvs); + + String xml = template.generateXML(d); + System.out.println("Output is " + xml); + + } + +} From 3cca63d2f6ff4052852876d9ccfe52424d2da615 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 20 May 2024 14:55:46 -0400 Subject: [PATCH 095/622] fix for empty rel pub entry --- .../pidproviders/doi/XmlMetadataTemplate.java | 179 +++++++++--------- 1 file changed, 94 insertions(+), 85 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index e9b7b0faa26..8725feca546 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -21,7 +21,6 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; - import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -97,14 +96,14 @@ public String generateXML(DvObject dvObject) { private void generateXML(DvObject dvObject, OutputStream outputStream) throws XMLStreamException { // Could/should use dataset metadata language for metadata from DvObject itself? String language = null; // machine locale? e.g. for Publisher which is global - String metadataLanguage = null; // when set, otherwise = language? + String metadataLanguage = null; // when set, otherwise = language? XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); xmlw.writeStartElement("resource"); - + xmlw.writeDefaultNamespace(XML_NAMESPACE); xmlw.writeAttribute("xmlns:xsi", XML_XSI); xmlw.writeAttribute("xsi:schemaLocation", XML_SCHEMA_LOCATION); - + writeIdentifier(xmlw, dvObject); writeCreators(xmlw, doiMetadata.getAuthors()); writeTitles(xmlw, dvObject, language); @@ -242,7 +241,7 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList) String nameIdentifierScheme = null; if (StringUtils.isNotBlank(author.getIdValue()) && StringUtils.isNotBlank(author.getIdType())) { nameIdentifier = author.getIdValue(); - if(nameIdentifier != null) { + if (nameIdentifier != null) { // Normalizes to the URL form of the identifier, returns null if the identifier // is not valid given the type nameIdentifier = author.getIdentifierAsUrl(); @@ -392,14 +391,16 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt } /** - * 7, Contributor (with optional given name, family name, name identifier - * and affiliation sub-properties) + * 7, Contributor (with optional given name, family name, name identifier and + * affiliation sub-properties) * * @see #writeContributorElement(javax.xml.stream.XMLStreamWriter, - * java.lang.String, java.lang.String, java.lang.String) + * java.lang.String, java.lang.String, java.lang.String) * - * @param xmlw The stream writer - * @param dvObject The Dataset/DataFile + * @param xmlw + * The stream writer + * @param dvObject + * The Dataset/DataFile * @throws XMLStreamException */ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -410,11 +411,11 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X List compoundContributors = new ArrayList(); // Dataset Subject= Dataverse subject, keyword, and/or topic classification // fields - //ToDo Include for files? - /*if(dvObject instanceof DataFile df) { - dvObject = df.getOwner(); - }*/ - + // ToDo Include for files? + /* + * if(dvObject instanceof DataFile df) { dvObject = df.getOwner(); } + */ + if (dvObject instanceof Dataset d) { DatasetVersion dv = d.getLatestVersionForCopy(); for (DatasetField dsf : dv.getDatasetFields()) { @@ -433,8 +434,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X } } } - - + for (DatasetFieldCompoundValue producerFieldValue : compoundProducers) { String producer = null; String affiliation = null; @@ -457,7 +457,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X } } - + for (DatasetFieldCompoundValue distributorFieldValue : compoundDistributors) { String distributor = null; String affiliation = null; @@ -517,7 +517,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X break; } } - // QDR - doesn't have Funder in the contributor type list. + // QDR - doesn't have Funder in the contributor type list. // Using a string isn't i18n if (StringUtils.isNotBlank(contributor) && !StringUtils.equalsIgnoreCase("Funder", contributorType)) { contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); @@ -526,7 +526,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X } } - + if (contributorsCreated) { xmlw.writeEndElement(); } @@ -568,7 +568,7 @@ private void writeEntityElements(XMLStreamWriter xmlw, String elementName, Strin logger.warning("DatasetAuthor.getIdentifierAsUrl returned a Malformed URL: " + nameIdentifier); } } - + if (StringUtils.isNotBlank(affiliation)) { attributeMap.clear(); if (affiliation.startsWith("https://ror.org/")) { @@ -584,8 +584,10 @@ private void writeEntityElements(XMLStreamWriter xmlw, String elementName, Strin /** * 8, Date (with type sub-property) (R) * - * @param xmlw The Steam writer - * @param dvObject The dataset/datafile + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset/datafile * @throws XMLStreamException */ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -668,15 +670,16 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } } - // 9, Language (MA), language private void writeLanguage(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { - //Currently not supported. Spec indicates one 'primary' language. Could send the first entry in DatasetFieldConstant.language or send iff there is only one entry, and/or default to the machine's default lang? + // Currently not supported. Spec indicates one 'primary' language. Could send + // the first entry in DatasetFieldConstant.language or send iff there is only + // one entry, and/or default to the machine's default lang? return; } - - // 10, ResourceType (with mandatory general type - // description sub- property) (M) + + // 10, ResourceType (with mandatory general type + // description sub- property) (M) private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { List kindOfDataValues = new ArrayList(); Map attributes = new HashMap(); @@ -711,8 +714,10 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X /** * 11 AlternateIdentifier (with type sub-property) (O) * - * @param xmlw The Steam writer - * @param dvObject The dataset/datafile + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset/datafile * @throws XMLStreamException */ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -731,7 +736,7 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) } } } - + if (altPids != null && !altPids.isEmpty()) { alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); for (AlternativePersistentIdentifier altPid : altPids) { @@ -788,8 +793,10 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) /** * 12, RelatedIdentifier (with type and relation type sub-properties) (R) * - * @param xmlw The Steam writer - * @param dvObject the dataset/datafile + * @param xmlw + * The Steam writer + * @param dvObject + * the dataset/datafile * @throws XMLStreamException */ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -813,6 +820,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th * way those two fields are used for all identifier types. The code here is * ~best effort to interpret those fields. */ + pubIdType = getCanonicalPublicationType(pubIdType); // Prefer url if set, otherwise check identifier @@ -821,49 +829,52 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th relatedIdentifier = identifier; } // For types where we understand the protocol, get the canonical form - switch (pubIdType) { - case "DOI": - if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { - relatedIdentifier = "doi:" + relatedIdentifier; - } - try { - GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); - relatedIdentifier = pid.asRawIdentifier(); - } catch (IllegalArgumentException e) { - relatedIdentifier = null; - } - break; - case "Handle": - if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { - relatedIdentifier = "hdl:" + relatedIdentifier; - } - try { - GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); - relatedIdentifier = pid.asRawIdentifier(); - } catch (IllegalArgumentException e) { - relatedIdentifier = null; - } - break; - case "URL": - break; - default: - - // For non-URL types, if a URL is given, split the string to get a schemeUri - try { - URL relatedUrl = new URL(relatedIdentifier); - String protocol = relatedUrl.getProtocol(); - String authority = relatedUrl.getAuthority(); - String site = String.format("%s://%s", protocol, authority); - relatedIdentifier = relatedIdentifier.substring(site.length()); - attributes.put("schemeURI", site); - } catch (MalformedURLException e) { - // Just an identifier + if (pubIdType != null) { + switch (pubIdType) { + case "DOI": + if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { + relatedIdentifier = "doi:" + relatedIdentifier; + } + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + relatedIdentifier = null; + } + break; + case "Handle": + if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { + relatedIdentifier = "hdl:" + relatedIdentifier; + } + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + relatedIdentifier = null; + } + break; + case "URL": + break; + default: + + // For non-URL types, if a URL is given, split the string to get a schemeUri + try { + URL relatedUrl = new URL(relatedIdentifier); + String protocol = relatedUrl.getProtocol(); + String authority = relatedUrl.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + relatedIdentifier = relatedIdentifier.substring(site.length()); + attributes.put("schemeURI", site); + } catch (MalformedURLException e) { + // Just an identifier + } } } - if (StringUtils.isNotBlank(relatedIdentifier)) { // Still have a valid entry - attributes.put("relatedIdentifierType", pubIdType); + if (pubIdType != null) { + attributes.put("relatedIdentifierType", pubIdType); + } attributes.put("relationType", "IsSupplementTo"); relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, relatedIdentifier); @@ -905,7 +916,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th static HashMap relatedIdentifierTypeMap = new HashMap(); - + private static String getCanonicalPublicationType(String pubIdType) { if (relatedIdentifierTypeMap.isEmpty()) { relatedIdentifierTypeMap.put("ARK".toLowerCase(), "ARK"); @@ -976,13 +987,12 @@ private void writeFormats(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStr formatsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "formats", formatsWritten); XmlWriterUtil.writeFullElement(xmlw, "format", format); } - /* Should original formats be sent? What about original sizes above? - if(dataFile.isTabularData()) { - String originalFormat = dataFile.getOriginalFileFormat(); - if(StringUtils.isNotBlank(originalFormat)) { - XmlWriterUtil.writeFullElement(xmlw, "format", format); - } - }*/ + /* + * Should original formats be sent? What about original sizes above? + * if(dataFile.isTabularData()) { String originalFormat = + * dataFile.getOriginalFileFormat(); if(StringUtils.isNotBlank(originalFormat)) + * { XmlWriterUtil.writeFullElement(xmlw, "format", format); } } + */ } } if (formatsWritten) { @@ -993,19 +1003,19 @@ private void writeFormats(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStr private void writeVersion(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { Dataset d = null; - if(dvObject instanceof Dataset) { + if (dvObject instanceof Dataset) { d = (Dataset) dvObject; } else if (dvObject instanceof DataFile) { d = ((DataFile) dvObject).getOwner(); } - if(d !=null) { + if (d != null) { DatasetVersion dv = d.getLatestVersionForCopy(); - String version = dv.getFriendlyVersionNumber(); + String version = dv.getFriendlyVersionNumber(); if (StringUtils.isNotBlank(version)) { XmlWriterUtil.writeFullElement(xmlw, "version", version); } } - + } private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -1244,7 +1254,6 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X } - private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { // fundingReferences -> fundingReference -> funderName, awardNumber boolean fundingReferenceWritten = false; From 30c80a9a5a27c51d8ca8130375d358aadb447a4f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 20 May 2024 15:32:13 -0400 Subject: [PATCH 096/622] bugs: remove bad nesting, dupe values --- .../pidproviders/doi/XmlMetadataTemplate.java | 51 +++++++++---------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 8725feca546..74da57094c4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -158,20 +158,22 @@ private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String languag if (StringUtils.isNotBlank(title) || StringUtils.isNotBlank(subTitle) || (altTitles != null && !String.join("", altTitles).isBlank())) { xmlw.writeStartElement("titles"); - XmlWriterUtil.writeFullElement(xmlw, "title", title, language); - + if (StringUtils.isNotBlank(title)) { + XmlWriterUtil.writeFullElement(xmlw, "title", title, language); + } Map attributes = new HashMap(); - attributes.put("titleType", "Subtitle"); - - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, title); - - attributes.clear(); - attributes.put("titleType", "AlternativeTitle"); - for (String altTitle : altTitles) { - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, altTitle); + if (StringUtils.isNotBlank(subTitle)) { + attributes.put("titleType", "Subtitle"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, subTitle); + } + if ((altTitles != null && !String.join("", altTitles).isBlank())) { + attributes.clear(); + attributes.put("titleType", "AlternativeTitle"); + for (String altTitle : altTitles) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, altTitle); + } } - xmlw.writeEndElement(); } } @@ -250,12 +252,9 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList) } if (StringUtils.isNotBlank(creatorName)) { - xmlw.writeStartElement("creator"); // JsonObject creatorObj = PersonOrOrgUtil.getPersonOrOrganization(creatorName, false, StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid")); - writeEntityElements(xmlw, "creator", null, creatorObj, affiliation, nameIdentifier, nameIdentifierScheme); - xmlw.writeEndElement(); // } else { @@ -693,21 +692,21 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X kindOfDataValues = dsf.getControlledVocabularyValues(); break; } - - if (kindOfDataValues.isEmpty()) { - // Write an attribute only element if there are no kindOfData values. - xmlw.writeStartElement("resourceType"); - xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); - xmlw.writeEndElement(); - } else { - for (ControlledVocabularyValue kindOfDataValue : kindOfDataValues) { - String resourceType = kindOfDataValue.getStrValue(); - if (StringUtils.isNotBlank(resourceType)) { - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); - } + } + if (kindOfDataValues.isEmpty()) { + // Write an attribute only element if there are no kindOfData values. + xmlw.writeStartElement("resourceType"); + xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); + xmlw.writeEndElement(); + } else { + for (ControlledVocabularyValue kindOfDataValue : kindOfDataValues) { + String resourceType = kindOfDataValue.getStrValue(); + if (StringUtils.isNotBlank(resourceType)) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); } } } + } } From a2acdebbac758317b5d2d07fc1af01859f8bfa85 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 20 May 2024 16:54:02 -0400 Subject: [PATCH 097/622] add XML Validation to test --- .../iq/dataverse/util/xml/XmlValidator.java | 5 +++ .../doi/datacite/XmlMetadataTemplateTest.java | 45 +++++++++++++++++-- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java index 586ca50b6fd..cec64ab95b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java @@ -24,7 +24,12 @@ public class XmlValidator { private static final Logger logger = Logger.getLogger(XmlValidator.class.getCanonicalName()); public static boolean validateXmlSchema(String fileToValidate, URL schemaToValidateAgainst) throws MalformedURLException, SAXException, IOException { + Source xmlFile = new StreamSource(new File(fileToValidate)); + return validateXmlSchema(xmlFile, schemaToValidateAgainst); + } + + public static boolean validateXmlSchema(Source xmlFile, URL schemaToValidateAgainst) throws MalformedURLException, SAXException, IOException { SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); Schema schema = schemaFactory.newSchema(schemaToValidateAgainst); Validator validator = schema.newValidator(); diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java index e576398a474..c1bbc3bebc1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java @@ -1,9 +1,11 @@ package edu.harvard.iq.dataverse.pidproviders.doi.datacite; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetAuthor; import edu.harvard.iq.dataverse.DatasetField; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.DataverseServiceBean; @@ -19,12 +21,17 @@ import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.testing.JvmSetting; import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; +import edu.harvard.iq.dataverse.util.xml.XmlValidator; import java.io.IOException; +import java.io.StringReader; +import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.Map; +import javax.xml.transform.stream.StreamSource; + import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.BeforeAll; @@ -32,6 +39,7 @@ import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; +import org.xml.sax.SAXException; import static org.junit.jupiter.api.Assertions.*; import static org.mockito.ArgumentMatchers.any; @@ -69,10 +77,32 @@ public static void setupMocks() { public void testDataCiteXMLCreation() throws IOException { DoiMetadata doiMetadata = new DoiMetadata(); doiMetadata.setTitle("A Title"); - List creators = new ArrayList(); - creators.add("Alice"); - creators.add("Bob"); - doiMetadata.setCreators(creators); + DatasetFieldType dft = new DatasetFieldType(DatasetFieldConstant.authorName, FieldType.TEXT, false); + dft.setDisplayFormat("#VALUE"); + DatasetFieldType dft2 = new DatasetFieldType(DatasetFieldConstant.authorAffiliation, FieldType.TEXT, false); + dft2.setDisplayFormat("#VALUE"); + DatasetAuthor alice = new DatasetAuthor(); + DatasetField df1 = new DatasetField(); + df1.setDatasetFieldType(dft); + df1.setSingleValue("Alice"); + alice.setName(df1); + DatasetField df2 = new DatasetField(); + df2.setDatasetFieldType(dft2); + df2.setSingleValue("Harvard University"); + alice.setAffiliation(df2); + DatasetAuthor bob = new DatasetAuthor(); + DatasetField df3 = new DatasetField(); + df3.setDatasetFieldType(dft); + df3.setSingleValue("Bob"); + bob.setName(df3); + DatasetField df4 = new DatasetField(); + df4.setDatasetFieldType(dft2); + df4.setSingleValue("QDR"); + bob.setAffiliation(df4); + List authors = new ArrayList<>(); + authors.add(alice); + authors.add(bob); + doiMetadata.setAuthors(authors); doiMetadata.setPublisher("Dataverse"); XmlMetadataTemplate template = new XmlMetadataTemplate(doiMetadata); @@ -102,6 +132,13 @@ public void testDataCiteXMLCreation() throws IOException { String xml = template.generateXML(d); System.out.println("Output is " + xml); + try { + StreamSource source = new StreamSource(new StringReader(xml)); + source.setSystemId("DataCite XML for test dataset"); + assertTrue(XmlValidator.validateXmlSchema(source, new URL("https://schema.datacite.org/meta/kernel-4/metadata.xsd"))); + } catch (SAXException e) { + System.out.println("Invalid schema: " + e.getMessage()); + } } From 3ec7a0b680ec5f04d650e830bb391c6be1f176f2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 23 May 2024 16:24:09 -0400 Subject: [PATCH 098/622] fix contributorType --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 74da57094c4..6e4d81d6248 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -535,7 +535,7 @@ private void writeEntityElements(XMLStreamWriter xmlw, String elementName, Strin xmlw.writeStartElement(elementName); Map attributeMap = new HashMap(); if (StringUtils.isNotBlank(type)) { - attributeMap.put("contributorType", type); + xmlw.writeAttribute("contributorType", type); } // person name=, if (entityObject.getBoolean("isPerson")) { From 842dee678530391264b2869ec71ab70258901189 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 23 May 2024 17:21:57 -0400 Subject: [PATCH 099/622] add geolocations element and multiple geolocation --- .../pidproviders/doi/XmlMetadataTemplate.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 6e4d81d6248..d0986616bb4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1174,15 +1174,18 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X List places = dv.getGeographicCoverage(); if (places != null && !places.isEmpty()) { // geoLocationPlace - geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); - + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); for (String[] place : places) { + xmlw.startElement("geoLocation"); // + ArrayList placeList = new ArrayList(); for (String placePart : place) { placeList.add(placePart); } XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", Strings.join(placeList, ", ")); + xmlw.endElement(); // } + } boolean boundingBoxFound = false; boolean productionPlaceFound = false; @@ -1213,7 +1216,8 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X } } if (StringUtils.isNoneBlank(wLongitude, eLongitude, nLatitude, sLatitude)) { - geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); + xmlw.startElement("geoLocation"); // if (wLongitude.equals(eLongitude) && nLatitude.equals(sLatitude)) { // A point xmlw.writeStartElement("geoLocationPoint"); @@ -1230,15 +1234,18 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X xmlw.writeEndElement(); } + xmlw.endElement(); // } } case DatasetFieldConstant.productionPlace: productionPlaceFound = true; // geoLocationPlace - geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); List prodPlaces = dsf.getValues(); for (String prodPlace : prodPlaces) { + xmlw.startElement("geoLocation"); // XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", prodPlace); + xmlw.endElement(); // } break; } From 81a7c4a946ee4e54ae91913c6de6857fb6a553ba Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 23 May 2024 17:29:19 -0400 Subject: [PATCH 100/622] typos --- .../pidproviders/doi/XmlMetadataTemplate.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index d0986616bb4..127a1930860 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1176,14 +1176,14 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X // geoLocationPlace geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); for (String[] place : places) { - xmlw.startElement("geoLocation"); // + xmlw.writeStartElement("geoLocation"); // ArrayList placeList = new ArrayList(); for (String placePart : place) { placeList.add(placePart); } XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", Strings.join(placeList, ", ")); - xmlw.endElement(); // + xmlw.writeEndElement(); // } } @@ -1217,7 +1217,7 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X } if (StringUtils.isNoneBlank(wLongitude, eLongitude, nLatitude, sLatitude)) { geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); - xmlw.startElement("geoLocation"); // + xmlw.writeStartElement("geoLocation"); // if (wLongitude.equals(eLongitude) && nLatitude.equals(sLatitude)) { // A point xmlw.writeStartElement("geoLocationPoint"); @@ -1234,7 +1234,7 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X xmlw.writeEndElement(); } - xmlw.endElement(); // + xmlw.writeEndElement(); // } } case DatasetFieldConstant.productionPlace: @@ -1243,9 +1243,9 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); List prodPlaces = dsf.getValues(); for (String prodPlace : prodPlaces) { - xmlw.startElement("geoLocation"); // + xmlw.writeStartElement("geoLocation"); // XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", prodPlace); - xmlw.endElement(); // + xmlw.writeEndElement(); // } break; } From ed5eab0deb487ebfbb53157a40e2cf409d5f40ab Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 09:53:12 -0400 Subject: [PATCH 101/622] try execute inside the main method trying to avoid a separate tx boundary --- .../command/impl/CuratePublishedDatasetVersionCommand.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java index f83041d87bd..fbff40a9c80 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java @@ -167,9 +167,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { DeleteDatasetVersionCommand cmd; cmd = new DeleteDatasetVersionCommand(getRequest(), savedDataset); - ctxt.engine().submit(cmd); - // Running the command above reindexes the dataset, so we don't need to do it - // again in here. + cmd.execute(ctxt); // And update metadata at PID provider ctxt.engine().submit( From 39673f05e6d4394d3549c58a5d487a9c732113c2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 10:41:42 -0400 Subject: [PATCH 102/622] Fix subject, keyword --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 127a1930860..85e28670cfc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -305,8 +305,10 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt // fields if (dvObject instanceof Dataset d) { DatasetVersion dv = d.getLatestVersionForCopy(); - dv.getDatasetSubjects(); for (DatasetField dsf : dv.getDatasetFields()) { + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.subject)) { + subjects.addAll(dsf.getValues()); + } if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.keyword)) { compoundKeywords = dsf.getDatasetFieldCompoundValues(); } else if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassification)) { @@ -330,7 +332,7 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt for (DatasetField subField : keywordFieldValue.getChildDatasetFields()) { switch (subField.getDatasetFieldType().getName()) { - case DatasetFieldConstant.keyword: + case DatasetFieldConstant.keywordValue: keyword = subField.getValue(); break; case DatasetFieldConstant.keywordVocab: From 36097d61bbf0c92aab48db01ff02e1c23b86be1a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 10:41:51 -0400 Subject: [PATCH 103/622] fix geo coverage --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index d723cf3d528..6648419216d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1342,9 +1342,8 @@ public List getGeographicCoverage() { } geoCoverages.add(coverageItem); } - + break; } - break; } return geoCoverages; } From a5d3b3e5a40b049176a6c3a205b1199c2117694e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 11:47:04 -0400 Subject: [PATCH 104/622] adjust funders to include grant number, add xml escaping for description --- .../pidproviders/doi/XmlMetadataTemplate.java | 70 +++++++++++++++---- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 85e28670cfc..3b6a5cb2906 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1082,7 +1082,7 @@ private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws X if (descriptions != null) { for (String description : descriptions) { descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, description); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, StringEscapeUtils.escapeXml10(description)); } } @@ -1272,21 +1272,67 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr dv = df.getOwner().getLatestVersionForCopy(); } if (dv != null) { - List funders = dv.getFunders(); - if (!funders.isEmpty()) { - - for (String funder : funders) { - if (!StringUtils.isBlank(funder)) { - fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); - xmlw.writeStartElement("fundingReference"); // - XmlWriterUtil.writeFullElement(xmlw, "funderName", funder); - xmlw.writeEndElement(); // + List retList = new ArrayList<>(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributor)) { + boolean addFunder = false; + for (DatasetFieldCompoundValue contributorValue : dsf.getDatasetFieldCompoundValues()) { + String contributorName = null; + String contributorType = null; + for (DatasetField subField : contributorValue.getChildDatasetFields()) { + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributorName)) { + contributorName = subField.getDisplayValue(); + } + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributorType)) { + contributorType = subField.getRawValue(); + } + } + // SEK 02/12/2019 move outside loop to prevent contrib type to carry over to + // next contributor + // TODO: Consider how this will work in French, Chinese, etc. + if ("Funder".equals(contributorType)) { + if (!StringUtils.isBlank(contributorName)) { + fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); + xmlw.writeStartElement("fundingReference"); // + XmlWriterUtil.writeFullElement(xmlw, "funderName", contributorName); + xmlw.writeEndElement(); // + } + } } } - if (fundingReferenceWritten) { - xmlw.writeEndElement(); // + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumber)) { + for (DatasetFieldCompoundValue grantObject : dsf.getDatasetFieldCompoundValues()) { + String funder = null; + String awardNumber = null; + for (DatasetField subField : grantObject.getChildDatasetFields()) { + // It would be nice to do something with grantNumberValue (the actual number) + // but schema.org doesn't support it. + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumberAgency)) { + String grantAgency = subField.getDisplayValue(); + funder = grantAgency; + } else if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumberValue)) { + String grantNumberValue = subField.getDisplayValue(); + awardNumber = grantNumberValue; + } + } + if (!StringUtils.isBlank(funder)) { + fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); + xmlw.writeStartElement("fundingReference"); // + XmlWriterUtil.writeFullElement(xmlw, "funderName", funder); + if (StringUtils.isNotBlank(awardNumber)) { + writeFullElement(xmlw, null, "awardNumber", null, awardNumber); + } + xmlw.writeEndElement(); // + } + + } } } + + if (fundingReferenceWritten) { + xmlw.writeEndElement(); // + } + } } } \ No newline at end of file From 8a12444d3b835a1df989bc674337897b7feaf1d2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 11:49:03 -0400 Subject: [PATCH 105/622] bug: add dataset descriptions --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 3b6a5cb2906..564768991cb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1069,7 +1069,7 @@ private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws X if (dvObject instanceof Dataset d) { dv = d.getLatestVersionForCopy(); - dv.getDescriptions(); + descriptions = dv.getDescriptions(); } else if (dvObject instanceof DataFile df) { String description = df.getDescription(); if (description != null) { From f3e5dc1d00e1d68a734b4c593ae5b874bb5d14a2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 11:54:50 -0400 Subject: [PATCH 106/622] typo, add xml escape for funder --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 564768991cb..a4fd4585028 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1294,7 +1294,7 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr if (!StringUtils.isBlank(contributorName)) { fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); xmlw.writeStartElement("fundingReference"); // - XmlWriterUtil.writeFullElement(xmlw, "funderName", contributorName); + XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(contributorName)); xmlw.writeEndElement(); // } } @@ -1318,9 +1318,9 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr if (!StringUtils.isBlank(funder)) { fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); xmlw.writeStartElement("fundingReference"); // - XmlWriterUtil.writeFullElement(xmlw, "funderName", funder); + XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(funder)); if (StringUtils.isNotBlank(awardNumber)) { - writeFullElement(xmlw, null, "awardNumber", null, awardNumber); + writeFullElement(xmlw, null, "awardNumber", StringEscapeUtils.escapeXml10(awardNumber)); } xmlw.writeEndElement(); // } From 5610c950212f2d3d80d7144c37c98e6cd0b71c5e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 12:00:09 -0400 Subject: [PATCH 107/622] still typo --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index a4fd4585028..e2883cad1f9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1320,7 +1320,7 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr xmlw.writeStartElement("fundingReference"); // XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(funder)); if (StringUtils.isNotBlank(awardNumber)) { - writeFullElement(xmlw, null, "awardNumber", StringEscapeUtils.escapeXml10(awardNumber)); + XmlWriterUtil.writeFullElement(xmlw, "awardNumber", StringEscapeUtils.escapeXml10(awardNumber)); } xmlw.writeEndElement(); // } From 7148b03360b00363f6550aed5d5d851ab7c2c356 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:14:48 -0400 Subject: [PATCH 108/622] mark contact as deprecated - unused --- .../java/edu/harvard/iq/dataverse/DatasetFieldConstant.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java index 22bad42df96..c3e385dcff2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java @@ -156,6 +156,8 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String confidentialityDeclaration="confidentialityDeclaration"; public final static String specialPermissions="specialPermissions"; public final static String restrictions="restrictions"; + @Deprecated + //Doesn't appear to be used and is not datasetContact public final static String contact="contact"; public final static String citationRequirements="citationRequirements"; public final static String depositorRequirements="depositorRequirements"; From 0470459316b3338bd940ea2f9afcf9ec1430eab1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:14:59 -0400 Subject: [PATCH 109/622] more fixes --- .../pidproviders/doi/XmlMetadataTemplate.java | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index e2883cad1f9..fd5a4ecf7fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -427,7 +427,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X case DatasetFieldConstant.distributor: compoundDistributors = dsf.getDatasetFieldCompoundValues(); break; - case DatasetFieldConstant.contact: + case DatasetFieldConstant.datasetContact: compoundContacts = dsf.getDatasetFieldCompoundValues(); break; case DatasetFieldConstant.contributor: @@ -638,7 +638,7 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } if (releaseDate != null) { - String date = Util.getDateTimeFormat().format(releaseDate); + String date = Util.getDateFormat().format(releaseDate); datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Available"); @@ -660,6 +660,14 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } } if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { + if(StringUtils.isNotBlank(startDate)) { + Date start = Util.getDateTimeFormat().parse(startDate); + startDate = Util.getDateFormat().format(start); + } + if(StringUtils.isNotBlank(endDate)) { + Date end = Util.getDateTimeFormat().parse(endDate); + endDate = Util.getDateFormat().format(end); + } datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Collected"); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, (startDate + "/" + endDate).trim()); @@ -675,14 +683,14 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea private void writeLanguage(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { // Currently not supported. Spec indicates one 'primary' language. Could send // the first entry in DatasetFieldConstant.language or send iff there is only - // one entry, and/or default to the machine's default lang? + // one entry, and/or default to the machine's default lang, or the dataverse metadatalang? return; } // 10, ResourceType (with mandatory general type // description sub- property) (M) private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { - List kindOfDataValues = new ArrayList(); + List kindOfDataValues = new ArrayList(); Map attributes = new HashMap(); attributes.put("resourceTypeGeneral", "Dataset"); @@ -691,7 +699,7 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X for (DatasetField dsf : dv.getDatasetFields()) { switch (dsf.getDatasetFieldType().getName()) { case DatasetFieldConstant.kindOfData: - kindOfDataValues = dsf.getControlledVocabularyValues(); + kindOfDataValues.addAll(dsf.getValues()); break; } } @@ -701,8 +709,7 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); xmlw.writeEndElement(); } else { - for (ControlledVocabularyValue kindOfDataValue : kindOfDataValues) { - String resourceType = kindOfDataValue.getStrValue(); + for (String resourceType : kindOfDataValues) { if (StringUtils.isNotBlank(resourceType)) { XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); } @@ -821,14 +828,16 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th * way those two fields are used for all identifier types. The code here is * ~best effort to interpret those fields. */ + logger.info("Found relpub: " + pubIdType + " " + identifier + " " + url); pubIdType = getCanonicalPublicationType(pubIdType); - +logger.info("Canonical type: " + pubIdType); // Prefer url if set, otherwise check identifier String relatedIdentifier = url; if (StringUtils.isBlank(relatedIdentifier)) { relatedIdentifier = identifier; } + logger.info("Related identifier: " + relatedIdentifier); // For types where we understand the protocol, get the canonical form if (pubIdType != null) { switch (pubIdType) { @@ -836,12 +845,15 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { relatedIdentifier = "doi:" + relatedIdentifier; } + logger.info("Intermediate Related identifier: " + relatedIdentifier); try { GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); relatedIdentifier = pid.asRawIdentifier(); } catch (IllegalArgumentException e) { + logger.warning("Invalid DOI: " + e.getLocalizedMessage()); relatedIdentifier = null; } + logger.info("Final Related identifier: " + relatedIdentifier); break; case "Handle": if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { From c0265da5324c6f68e9356a44261ce6b166ded6b8 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:22:29 -0400 Subject: [PATCH 110/622] catch parseexception --- .../pidproviders/doi/XmlMetadataTemplate.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index fd5a4ecf7fb..9ed417e77ce 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -5,6 +5,7 @@ import java.io.OutputStream; import java.net.MalformedURLException; import java.net.URL; +import java.text.ParseException; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; @@ -661,12 +662,20 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { if(StringUtils.isNotBlank(startDate)) { + try { Date start = Util.getDateTimeFormat().parse(startDate); startDate = Util.getDateFormat().format(start); + } catch (ParseException e) { + logger.warning("Could not parse date: " + startDate); + } } if(StringUtils.isNotBlank(endDate)) { + try { Date end = Util.getDateTimeFormat().parse(endDate); - endDate = Util.getDateFormat().format(end); + endDate = Util.getDateFormat().format(end); + } catch (ParseException e) { + logger.warning("Could not parse date: " + endDate); + }; } datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Collected"); From 2ff867850500aa9b2eb5712348b65cf48ed4b917 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:52:42 -0400 Subject: [PATCH 111/622] fix alternateIdentifier, related PID parsing, series --- .../pidproviders/doi/XmlMetadataTemplate.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 9ed417e77ce..7f861b3e42d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -755,7 +755,7 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) } if (altPids != null && !altPids.isEmpty()) { - alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternateIdentifiers", alternatesWritten); for (AlternativePersistentIdentifier altPid : altPids) { String identifierType = null; String identifier = null; @@ -775,7 +775,7 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) identifier = altPid.getAuthority() + altPid.getIdentifier(); break; } - attributes.put("alternativeIdentifierType", identifierType); + attributes.put("alternateIdentifierType", identifierType); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); } @@ -795,9 +795,9 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) break; } } - attributes.put("alternativeIdentifierType", identifierType); + attributes.put("alternateIdentifierType", identifierType); if (!StringUtils.isBlank(identifier)) { - alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternateIdentifiers", alternatesWritten); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); } @@ -851,7 +851,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th if (pubIdType != null) { switch (pubIdType) { case "DOI": - if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { + if (!relatedIdentifier.startsWith("doi:") || !relatedIdentifier.startsWith("http")) { relatedIdentifier = "doi:" + relatedIdentifier; } logger.info("Intermediate Related identifier: " + relatedIdentifier); @@ -865,7 +865,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th logger.info("Final Related identifier: " + relatedIdentifier); break; case "Handle": - if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { + if (!relatedIdentifier.startsWith("hdl:") || !relatedIdentifier.startsWith("http")) { relatedIdentifier = "hdl:" + relatedIdentifier; } try { @@ -1158,7 +1158,7 @@ private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws X List childDsfs = dsfcv.getChildDatasetFields(); for (DatasetField childDsf : childDsfs) { - if (DatasetFieldConstant.seriesInformation.equals(childDsf.getDatasetFieldType().getName())) { + if (DatasetFieldConstant.seriesName.equals(childDsf.getDatasetFieldType().getName())) { String seriesInformation = childDsf.getValue(); if (StringUtils.isNotBlank(seriesInformation)) { descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); From 182f3d7bca310c54eb44f0452c76671b26b03824 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:53:47 -0400 Subject: [PATCH 112/622] catch PID update exception to avoid corrupt dataset --- .../CuratePublishedDatasetVersionCommand.java | 45 ++++++++++++------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java index fbff40a9c80..dd8b19e0c3b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.datavariable.VarGroup; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; @@ -151,7 +152,11 @@ public Dataset execute(CommandContext ctxt) throws CommandException { tempDataset.setThumbnailFile(publishedFmd.getDataFile()); } } - + if(logger.isLoggable(Level.FINE)) { + for(FileMetadata fmd: updateVersion.getFileMetadatas()) { + logger.fine("Id: " + fmd.getId() + " label: " + fmd.getLabel()); + } + } // Update modification time on the published version and the dataset updateVersion.setLastUpdateTime(getTimestamp()); tempDataset.setModificationTime(getTimestamp()); @@ -170,28 +175,38 @@ public Dataset execute(CommandContext ctxt) throws CommandException { cmd.execute(ctxt); // And update metadata at PID provider - ctxt.engine().submit( - new UpdateDvObjectPIDMetadataCommand(savedDataset, getRequest())); - - //And the exported metadata files try { - ExportService instance = ExportService.getInstance(); - instance.exportAllFormats(getDataset()); - } catch (ExportException ex) { - // Just like with indexing, a failure to export is not a fatal condition. - logger.log(Level.WARNING, "Curate Published DatasetVersion: exception while exporting metadata files:{0}", ex.getMessage()); + ctxt.engine().submit( + new UpdateDvObjectPIDMetadataCommand(savedDataset, getRequest())); + } catch (CommandException ex) { + //Make this non-fatal as after the DeleteDatasetVersionCommand, we can't roll back - for some reason no datasetfields remain in the DB + //(The old version doesn't need them and the new version doesn't get updated to include them?) + logger.log(Level.WARNING, "Curate Published DatasetVersion: exception while updating PID metadata:{0}", ex.getMessage()); } - - // Update so that getDataset() in updateDatasetUser will get the up-to-date copy // (with no draft version) setDataset(savedDataset); updateDatasetUser(ctxt); - - - return savedDataset; } + @Override + public boolean onSuccess(CommandContext ctxt, Object r) { + boolean retVal = true; + Dataset d = (Dataset) r; + + ctxt.index().asyncIndexDataset(d, true); + + // And the exported metadata files + try { + ExportService instance = ExportService.getInstance(); + instance.exportAllFormats(d); + } catch (ExportException ex) { + // Just like with indexing, a failure to export is not a fatal condition. + retVal = false; + logger.log(Level.WARNING, "Curate Published DatasetVersion: exception while exporting metadata files:{0}", ex.getMessage()); + } + return retVal; + } } From be903555bcc3c21169b3a7343a076c331d308bf6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 16:24:40 -0400 Subject: [PATCH 113/622] try long sleep --- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index dab0ff43fcf..29de42f3578 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -944,9 +944,9 @@ public void callFinalizePublishCommandAsynchronously(Long datasetId, CommandCont // portion of the PublishDatasetCommand. I'm going to leave the 1 second // sleep below, for just in case reasons: -- L.A. try { - Thread.sleep(1000); + Thread.sleep(5000); } catch (Exception ex) { - logger.warning("Failed to sleep for a second."); + logger.warning("Failed to sleep for five seconds."); } logger.fine("Running FinalizeDatasetPublicationCommand, asynchronously"); Dataset theDataset = find(datasetId); From e458e8ca5c14f48d8da94d83c2cb40e76e9198d6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 16:53:28 -0400 Subject: [PATCH 114/622] set dv released before pid publicize, go back to short time --- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 4 ++-- .../command/impl/FinalizeDatasetPublicationCommand.java | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 29de42f3578..18bd6dc74ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -944,9 +944,9 @@ public void callFinalizePublishCommandAsynchronously(Long datasetId, CommandCont // portion of the PublishDatasetCommand. I'm going to leave the 1 second // sleep below, for just in case reasons: -- L.A. try { - Thread.sleep(5000); + Thread.sleep(1000); } catch (Exception ex) { - logger.warning("Failed to sleep for five seconds."); + logger.warning("Failed to sleep for one second."); } logger.fine("Running FinalizeDatasetPublicationCommand, asynchronously"); Dataset theDataset = find(datasetId); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 287e877f6e0..299bb3168de 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -211,7 +211,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { if (theDataset.getLatestVersion().getVersionState() != RELEASED) { // some imported datasets may already be released. - + theDataset.getLatestVersion().setVersionState(RELEASED); if (!datasetExternallyReleased) { publicizeExternalIdentifier(theDataset, ctxt); // Will throw a CommandException, unless successful. @@ -220,7 +220,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // a failure - it will remove any locks, and it will send a // proper notification to the user(s). } - theDataset.getLatestVersion().setVersionState(RELEASED); } final Dataset ds = ctxt.em().merge(theDataset); From 27fe7b4d0dad6124d5e036c8ec5d36b31a371a9b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 17:03:13 -0400 Subject: [PATCH 115/622] always use latest version for copy --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 7f861b3e42d..0adc9984b3d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -146,7 +146,7 @@ private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String languag List altTitles = new ArrayList<>(); // Only Datasets can have a subtitle or alternative titles if (dvObject instanceof Dataset d) { - DatasetVersion dv = d.getLatestVersion(); + DatasetVersion dv = d.getLatestVersionForCopy(); Optional subTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.subTitle)).findFirst(); if (subTitleField.isPresent()) { subTitle = subTitleField.get().getValue(); From 00a383007686b49b938f88415b476e377d054f98 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 09:53:40 -0400 Subject: [PATCH 116/622] handle deaccession, fix relatedIDtype for files --- .../edu/harvard/iq/dataverse/DataFile.java | 19 +++ .../pidproviders/doi/XmlMetadataTemplate.java | 117 +++++++++++------- 2 files changed, 90 insertions(+), 46 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 29a4a14c021..1a610d9ea6e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -1123,4 +1123,23 @@ private boolean tagExists(String tagLabel) { } return false; } + + public boolean isDeaccessioned() { + // return true, if all published versions were deaccessioned + boolean inDeaccessionedVersions = false; + for (FileMetadata fmd : getFileMetadatas()) { + DatasetVersion testDsv = fmd.getDatasetVersion(); + if (testDsv.isReleased()) { + return false; + } + // Also check for draft version + if (testDsv.isDraft()) { + return false; + } + if (testDsv.isDeaccessioned()) { + inDeaccessionedVersions = true; + } + } + return inDeaccessionedVersions; // since any published version would have already returned + } } // end of class diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 0adc9984b3d..96ee84fe13b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -100,30 +100,41 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM String metadataLanguage = null; // when set, otherwise = language? XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); xmlw.writeStartElement("resource"); - + boolean deaccessioned=false; + if(dvObject instanceof Dataset d) { + deaccessioned=d.isDeaccessioned(); + } else if (dvObject instanceof DataFile df) { + deaccessioned = df.isDeaccessioned(); + } xmlw.writeDefaultNamespace(XML_NAMESPACE); xmlw.writeAttribute("xmlns:xsi", XML_XSI); xmlw.writeAttribute("xsi:schemaLocation", XML_SCHEMA_LOCATION); writeIdentifier(xmlw, dvObject); - writeCreators(xmlw, doiMetadata.getAuthors()); - writeTitles(xmlw, dvObject, language); - writePublisher(xmlw, dvObject); - writePublicationYear(xmlw, dvObject); - writeSubjects(xmlw, dvObject); - writeContributors(xmlw, dvObject); - writeDates(xmlw, dvObject); - writeLanguage(xmlw, dvObject); + writeCreators(xmlw, doiMetadata.getAuthors(), deaccessioned); + writeTitles(xmlw, dvObject, language, deaccessioned); + writePublisher(xmlw, dvObject, deaccessioned); + writePublicationYear(xmlw, dvObject, deaccessioned); + if (!deaccessioned) { + writeSubjects(xmlw, dvObject); + writeContributors(xmlw, dvObject); + writeDates(xmlw, dvObject); + writeLanguage(xmlw, dvObject); + } writeResourceType(xmlw, dvObject); - writeAlternateIdentifiers(xmlw, dvObject); - writeRelatedIdentifiers(xmlw, dvObject); - writeSize(xmlw, dvObject); - writeFormats(xmlw, dvObject); - writeVersion(xmlw, dvObject); - writeAccessRights(xmlw, dvObject); - writeDescriptions(xmlw, dvObject); - writeGeoLocations(xmlw, dvObject); - writeFundingReferences(xmlw, dvObject); + if (!deaccessioned) { + writeAlternateIdentifiers(xmlw, dvObject); + writeRelatedIdentifiers(xmlw, dvObject); + writeSize(xmlw, dvObject); + writeFormats(xmlw, dvObject); + writeVersion(xmlw, dvObject); + writeAccessRights(xmlw, dvObject); + } + writeDescriptions(xmlw, dvObject, deaccessioned); + if (!deaccessioned) { + writeGeoLocations(xmlw, dvObject); + writeFundingReferences(xmlw, dvObject); + } xmlw.writeEndElement(); xmlw.flush(); } @@ -140,23 +151,29 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM * @return * @throws XMLStreamException */ - private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String language) throws XMLStreamException { - String title = doiMetadata.getTitle(); + private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String language, boolean deaccessioned) throws XMLStreamException { + String title = null; String subTitle = null; List altTitles = new ArrayList<>(); - // Only Datasets can have a subtitle or alternative titles - if (dvObject instanceof Dataset d) { - DatasetVersion dv = d.getLatestVersionForCopy(); - Optional subTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.subTitle)).findFirst(); - if (subTitleField.isPresent()) { - subTitle = subTitleField.get().getValue(); - } - Optional altTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.alternativeTitle)).findFirst(); - if (altTitleField.isPresent()) { - altTitles = altTitleField.get().getValues(); + + if (!deaccessioned) { + doiMetadata.getTitle(); + + // Only Datasets can have a subtitle or alternative titles + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + Optional subTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.subTitle)).findFirst(); + if (subTitleField.isPresent()) { + subTitle = subTitleField.get().getValue(); + } + Optional altTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.alternativeTitle)).findFirst(); + if (altTitleField.isPresent()) { + altTitles = altTitleField.get().getValues(); + } } + } else { + title = AbstractDOIProvider.UNAVAILABLE; } - if (StringUtils.isNotBlank(title) || StringUtils.isNotBlank(subTitle) || (altTitles != null && !String.join("", altTitles).isBlank())) { xmlw.writeStartElement("titles"); if (StringUtils.isNotBlank(title)) { @@ -227,13 +244,13 @@ private void writeIdentifier(XMLStreamWriter xmlw, DvObject dvObject) throws XML * - the list of authors * @throws XMLStreamException */ - public void writeCreators(XMLStreamWriter xmlw, List authorList) throws XMLStreamException { + public void writeCreators(XMLStreamWriter xmlw, List authorList, boolean deaccessioned) throws XMLStreamException { // creators -> creator -> creatorName with nameType attribute, givenName, // familyName, nameIdentifier // write all creators xmlw.writeStartElement("creators"); // - if (authorList != null && !authorList.isEmpty()) { + if (!deaccessioned && authorList != null && !authorList.isEmpty()) { for (DatasetAuthor author : authorList) { String creatorName = StringEscapeUtils.escapeXml10(author.getName().getDisplayValue()); String affiliation = null; @@ -267,18 +284,21 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList) xmlw.writeEndElement(); // } - private void writePublisher(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + private void writePublisher(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { // publisher should already be non null - :unav if it wasn't available + if(deaccessioned) { + doiMetadata.setPublisher(AbstractPidProvider.UNAVAILABLE); + } XmlWriterUtil.writeFullElement(xmlw, "publisher", doiMetadata.getPublisher()); } - private void writePublicationYear(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + private void writePublicationYear(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { // Can't use "UNKNOWN" here because DataCite will respond with "[facet // 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'" String pubYear = "9999"; // FIXME: Investigate why this.publisherYear is sometimes null now that pull // request #4606 has been merged. - if (doiMetadata.getPublisherYear() != null) { + if (! deaccessioned && (doiMetadata.getPublisherYear() != null)) { // Added to prevent a NullPointerException when trying to destroy datasets when // using DataCite rather than EZID. pubYear = doiMetadata.getPublisherYear(); @@ -926,6 +946,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th attributes.clear(); attributes.put("relationType", "IsPartOf"); + attributes.put("relatedIdentifierType", pubIdType); relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, pid.asRawIdentifier()); } @@ -1082,20 +1103,24 @@ private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) throws X xmlw.writeEndElement(); // } - private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { // descriptions -> description with descriptionType attribute boolean descriptionsWritten = false; List descriptions = null; DatasetVersion dv = null; - - if (dvObject instanceof Dataset d) { - dv = d.getLatestVersionForCopy(); - descriptions = dv.getDescriptions(); - } else if (dvObject instanceof DataFile df) { - String description = df.getDescription(); - if (description != null) { - descriptions = new ArrayList(); - descriptions.add(description); + if(deaccessioned) { + descriptions = new ArrayList(); + descriptions.add(AbstractDOIProvider.UNAVAILABLE); + } else { + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + descriptions = dv.getDescriptions(); + } else if (dvObject instanceof DataFile df) { + String description = df.getDescription(); + if (description != null) { + descriptions = new ArrayList(); + descriptions.add(description); + } } } Map attributes = new HashMap(); From 1faf0cd84c5ef52a8e88afb7989e238058870916 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 10:26:30 -0400 Subject: [PATCH 117/622] missed assignment for title --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 96ee84fe13b..b2008e14a89 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -157,7 +157,7 @@ private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String languag List altTitles = new ArrayList<>(); if (!deaccessioned) { - doiMetadata.getTitle(); + title = doiMetadata.getTitle(); // Only Datasets can have a subtitle or alternative titles if (dvObject instanceof Dataset d) { From 23dd581c98b921908b2cdcca13d32e8731c76e7e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 10:54:06 -0400 Subject: [PATCH 118/622] fix creator for deaccessioned --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index b2008e14a89..8f962204302 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -249,8 +249,10 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, // familyName, nameIdentifier // write all creators xmlw.writeStartElement("creators"); // - - if (!deaccessioned && authorList != null && !authorList.isEmpty()) { + if(deaccessioned) { + authorList = null; + } + if (authorList != null && !authorList.isEmpty()) { for (DatasetAuthor author : authorList) { String creatorName = StringEscapeUtils.escapeXml10(author.getName().getDisplayValue()); String affiliation = null; From 3bbd2e9dfc4cd3d1e5cebd5a9cf7dbbfe52c1fa4 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 11:51:50 -0400 Subject: [PATCH 119/622] correct fix for creators when deaccessioned --- .../pidproviders/doi/XmlMetadataTemplate.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 8f962204302..a3eca9ef9a0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -250,8 +250,10 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, // write all creators xmlw.writeStartElement("creators"); // if(deaccessioned) { + //skip the loop below authorList = null; } + boolean nothingWritten = true; if (authorList != null && !authorList.isEmpty()) { for (DatasetAuthor author : authorList) { String creatorName = StringEscapeUtils.escapeXml10(author.getName().getDisplayValue()); @@ -274,15 +276,17 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, if (StringUtils.isNotBlank(creatorName)) { JsonObject creatorObj = PersonOrOrgUtil.getPersonOrOrganization(creatorName, false, StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid")); + nothingWritten = false; writeEntityElements(xmlw, "creator", null, creatorObj, affiliation, nameIdentifier, nameIdentifierScheme); } - else { - // Authors unavailable - XmlWriterUtil.writeFullElement(xmlw, "creator", "creatorName", AbstractPidProvider.UNAVAILABLE); - } + } } + if (nothingWritten) { + // Authors unavailable + XmlWriterUtil.writeFullElement(xmlw, "creator", "creatorName", AbstractPidProvider.UNAVAILABLE); + } xmlw.writeEndElement(); // } From 4def6da32c207223fb2cf9d5aada50f921ddd474 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 12:00:15 -0400 Subject: [PATCH 120/622] remove bad value and lang --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index a3eca9ef9a0..2d09c67fea9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -285,7 +285,7 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, } if (nothingWritten) { // Authors unavailable - XmlWriterUtil.writeFullElement(xmlw, "creator", "creatorName", AbstractPidProvider.UNAVAILABLE); + XmlWriterUtil.writeFullElement(xmlw, "creator", AbstractPidProvider.UNAVAILABLE); } xmlw.writeEndElement(); // } From eac477ec3c2fef4f48759b110f6157081b301eff Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 12:46:07 -0400 Subject: [PATCH 121/622] add creatorName sub element for deaccession/no names case --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 2d09c67fea9..a660a80448a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -285,7 +285,9 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, } if (nothingWritten) { // Authors unavailable - XmlWriterUtil.writeFullElement(xmlw, "creator", AbstractPidProvider.UNAVAILABLE); + xmlw.writeStartElement("creator"); + XmlWriterUtil.writeFullElement(xmlw, "creatorName", AbstractPidProvider.UNAVAILABLE); + xmlw.writeEndElement("creator"); } xmlw.writeEndElement(); // } From 154ac8a91554be29492d62454f1b0e52501b5af2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 12:49:59 -0400 Subject: [PATCH 122/622] typo --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index a660a80448a..732a633116e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -287,7 +287,7 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, // Authors unavailable xmlw.writeStartElement("creator"); XmlWriterUtil.writeFullElement(xmlw, "creatorName", AbstractPidProvider.UNAVAILABLE); - xmlw.writeEndElement("creator"); + xmlw.writeEndElement(); } xmlw.writeEndElement(); // } From 9144f6c96ae0685a7ac719d2203f0aed3f71e85e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 13:16:32 -0400 Subject: [PATCH 123/622] fix resourceType - always 1 entry --- .../pidproviders/doi/XmlMetadataTemplate.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 732a633116e..8f6211c0730 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -736,23 +736,24 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X for (DatasetField dsf : dv.getDatasetFields()) { switch (dsf.getDatasetFieldType().getName()) { case DatasetFieldConstant.kindOfData: - kindOfDataValues.addAll(dsf.getValues()); + List vals = dsf.getValues(); + for(String val: vals) { + if(StringUtils.isNotBlank(val)) { + kindOfDataValues.add(val); + } + } break; } } - if (kindOfDataValues.isEmpty()) { + if (!kindOfDataValues.isEmpty()) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, String.join(", ", kindOfDataValues)); + + } else { // Write an attribute only element if there are no kindOfData values. xmlw.writeStartElement("resourceType"); xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); xmlw.writeEndElement(); - } else { - for (String resourceType : kindOfDataValues) { - if (StringUtils.isNotBlank(resourceType)) { - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); - } - } } - } } From a5870fbaf3a89e9c6100b8e1f4371caa291c1e23 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 13:26:41 -0400 Subject: [PATCH 124/622] Also handle file case for resourceType --- .../pidproviders/doi/XmlMetadataTemplate.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 8f6211c0730..7d817d57a2a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -745,16 +745,17 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X break; } } - if (!kindOfDataValues.isEmpty()) { - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, String.join(", ", kindOfDataValues)); - - } else { - // Write an attribute only element if there are no kindOfData values. - xmlw.writeStartElement("resourceType"); - xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); - xmlw.writeEndElement(); - } } + if (!kindOfDataValues.isEmpty()) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, String.join(", ", kindOfDataValues)); + + } else { + // Write an attribute only element if there are no kindOfData values. + xmlw.writeStartElement("resourceType"); + xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); + xmlw.writeEndElement(); + } + } /** From 24db2af2bfe9564eedc1ee9aedae8b9048bd551e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 31 May 2024 14:47:06 -0400 Subject: [PATCH 125/622] missed changes --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 7d817d57a2a..f5bd009e8d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -23,6 +23,7 @@ import javax.xml.stream.XMLStreamWriter; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.StringEscapeUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -39,6 +40,7 @@ import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetFieldType; import edu.harvard.iq.dataverse.DatasetFieldValue; +import edu.harvard.iq.dataverse.DatasetRelPublication; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; @@ -703,7 +705,7 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea endDate = Util.getDateFormat().format(end); } catch (ParseException e) { logger.warning("Could not parse date: " + endDate); - }; + }; } datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Collected"); @@ -853,7 +855,6 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th Map attributes = new HashMap(); if (dvObject instanceof Dataset dataset) { - List relatedPublications = dataset.getLatestVersionForCopy().getRelatedPublications(); if (!relatedPublications.isEmpty()) { for (DatasetRelPublication relatedPub : relatedPublications) { @@ -967,7 +968,6 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th } } - static HashMap relatedIdentifierTypeMap = new HashMap(); private static String getCanonicalPublicationType(String pubIdType) { From f0fd61ad555369ef1af9a1529797cf8d73d6efde Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 31 May 2024 15:05:02 -0400 Subject: [PATCH 126/622] simplify - util checks for null and empty --- .../dataverse/export/ddi/DdiExportUtil.java | 46 +++++-------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index c0e3057696a..f5efc448090 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -371,7 +371,7 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException{ xmlw.writeStartElement("verStmt"); - XmlWriterUtil.writeAttribute(xmlw,"source","archive"); + xmlw.writeAttribute("source","archive"); xmlw.writeStartElement("version"); XmlWriterUtil.writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); XmlWriterUtil.writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); @@ -842,9 +842,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!authorName.isEmpty()){ xmlw.writeStartElement("AuthEnty"); - if(!authorAffiliation.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"affiliation",authorAffiliation); - } + XmlWriterUtil.writeAttribute(xmlw,"affiliation",authorAffiliation); xmlw.writeCharacters(authorName); xmlw.writeEndElement(); //AuthEnty } @@ -865,9 +863,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!contributorName.isEmpty()){ xmlw.writeStartElement("othId"); - if(!contributorType.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"role", contributorType); - } + XmlWriterUtil.writeAttribute(xmlw,"role", contributorType); xmlw.writeCharacters(contributorName); xmlw.writeEndElement(); //othId } @@ -907,12 +903,8 @@ private static void writeContactsElement(XMLStreamWriter xmlw, DatasetVersionDTO // TODO: Since datasetContactEmail is a required field but datasetContactName is not consider not checking if datasetContactName is empty so we can write out datasetContactEmail. if (!datasetContactName.isEmpty()){ xmlw.writeStartElement("contact"); - if(!datasetContactAffiliation.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"affiliation",datasetContactAffiliation); - } - if(!datasetContactEmail.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"email",datasetContactEmail); - } + XmlWriterUtil.writeAttribute(xmlw,"affiliation",datasetContactAffiliation); + XmlWriterUtil.writeAttribute(xmlw,"email",datasetContactEmail); xmlw.writeCharacters(datasetContactName); xmlw.writeEndElement(); //AuthEnty } @@ -957,9 +949,7 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT xmlw.writeStartElement("producer"); XmlWriterUtil.writeAttribute(xmlw, "affiliation", producerAffiliation); XmlWriterUtil.writeAttribute(xmlw, "abbr", producerAbbreviation); - /*if (!producerLogo.isEmpty()) { - XmlWriterUtil.writeAttribute(xmlw, "role", producerLogo); - }*/ + //XmlWriterUtil.writeAttribute(xmlw, "role", producerLogo); xmlw.writeCharacters(producerName); xmlw.writeEndElement(); //AuthEnty } @@ -1139,9 +1129,7 @@ private static void writeAbstractElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!descriptionText.isEmpty()){ xmlw.writeStartElement("abstract"); - if(!descriptionDate.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"date",descriptionDate); - } + XmlWriterUtil.writeAttribute(xmlw,"date",descriptionDate); if(DvObjectContainer.isMetadataLanguageSet(lang)) { xmlw.writeAttribute("xml:lang", lang); } @@ -1176,9 +1164,7 @@ private static void writeGrantElement(XMLStreamWriter xmlw, DatasetVersionDTO da } if (!grantNumber.isEmpty()){ xmlw.writeStartElement("grantNo"); - if(!grantAgency.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"agency",grantAgency); - } + XmlWriterUtil.writeAttribute(xmlw,"agency",grantAgency); xmlw.writeCharacters(grantNumber); xmlw.writeEndElement(); //grantno } @@ -1210,9 +1196,7 @@ private static void writeOtherIdElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!otherId.isEmpty()){ xmlw.writeStartElement("IDNo"); - if(!otherIdAgency.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"agency",otherIdAgency); - } + XmlWriterUtil.writeAttribute(xmlw,"agency",otherIdAgency); xmlw.writeCharacters(otherId); xmlw.writeEndElement(); //IDNo } @@ -1244,9 +1228,7 @@ private static void writeSoftwareElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!softwareName.isEmpty()){ xmlw.writeStartElement("software"); - if(!softwareVersion.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"version",softwareVersion); - } + XmlWriterUtil.writeAttribute(xmlw,"version",softwareVersion); xmlw.writeCharacters(softwareName); xmlw.writeEndElement(); //software } @@ -1359,12 +1341,8 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da } if (!notesText.isEmpty()) { xmlw.writeStartElement("notes"); - if(!notesType.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"type",notesType); - } - if(!notesSubject.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"subject",notesSubject); - } + XmlWriterUtil.writeAttribute(xmlw,"type",notesType); + XmlWriterUtil.writeAttribute(xmlw,"subject",notesSubject); xmlw.writeCharacters(notesText); xmlw.writeEndElement(); } From d40b2e5e37c2d0a48a4a6dc846cd50a4b5ac3cd4 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Mon, 3 Jun 2024 09:49:12 -0400 Subject: [PATCH 127/622] review comment mods --- doc/release-notes/10169-JSON-schema-validation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/10169-JSON-schema-validation.md b/doc/release-notes/10169-JSON-schema-validation.md index ae47f6a1401..ad89804f504 100644 --- a/doc/release-notes/10169-JSON-schema-validation.md +++ b/doc/release-notes/10169-JSON-schema-validation.md @@ -1,4 +1,4 @@ ### JSON Schema for datasets -Enhanced JSON schema validation with checks for required and allowed child objects, Type checking for field types including: ''primative''; ''compound''; and ''controlledVocabulary'' . More user-friendly error messages to help pinpoint the issues in the Dataset JSON. Rules are driven off the database schema, so no manual configuration is needed. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.1/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10169. +Enhanced JSON schema validation with checks for required and allowed child objects, Type checking for field types including: ''primative''; ''compound''; and ''controlledVocabulary'' . More user-friendly error messages to help pinpoint the issues in the Dataset JSON. Rules are driven off the database schema, so no manual configuration is needed. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.3/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10169. From ead153f502ef77258d20f6faf4a0fc8282a74687 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 10 Jun 2024 12:18:45 -0400 Subject: [PATCH 128/622] typo in DOI parsing logic --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index f5bd009e8d7..e6c1a1ae6b4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -882,7 +882,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th if (pubIdType != null) { switch (pubIdType) { case "DOI": - if (!relatedIdentifier.startsWith("doi:") || !relatedIdentifier.startsWith("http")) { + if (!(relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http"))) { relatedIdentifier = "doi:" + relatedIdentifier; } logger.info("Intermediate Related identifier: " + relatedIdentifier); From 76d6ee45ffc1cf2b76a1cda6c9bbfc31fae819af Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 10 Jun 2024 17:07:26 -0400 Subject: [PATCH 129/622] doc and release note tweaks #10169 --- .../10169-JSON-schema-validation.md | 5 ++-- doc/sphinx-guides/source/api/native-api.rst | 28 ++++++++++--------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/doc/release-notes/10169-JSON-schema-validation.md b/doc/release-notes/10169-JSON-schema-validation.md index ad89804f504..92ff4a917d5 100644 --- a/doc/release-notes/10169-JSON-schema-validation.md +++ b/doc/release-notes/10169-JSON-schema-validation.md @@ -1,4 +1,3 @@ -### JSON Schema for datasets - -Enhanced JSON schema validation with checks for required and allowed child objects, Type checking for field types including: ''primative''; ''compound''; and ''controlledVocabulary'' . More user-friendly error messages to help pinpoint the issues in the Dataset JSON. Rules are driven off the database schema, so no manual configuration is needed. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.3/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10169. +### Improved JSON Schema validation for datasets +Enhanced JSON schema validation with checks for required and allowed child objects, type checking for field types including `primitive`, `compound` and `controlledVocabulary`. More user-friendly error messages to help pinpoint the issues in the dataset JSON. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.3/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10543. diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index a1c864fca4f..7bd1e9f9561 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -539,9 +539,7 @@ The fully expanded example above (without environment variables) looks like this Retrieve a Dataset JSON Schema for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset. This -first version of the schema only includes required elements and fields. In the future we plan to improve the schema by adding controlled -vocabulary and more robust dataset field format testing: +Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset: .. code-block:: bash @@ -567,17 +565,21 @@ Validate Dataset JSON File for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Validates a dataset JSON file customized for a given collection prior to creating the dataset. + The validation tests for: -Json formatting and the presence of required elements -typeClass must follow these rules: -- if multiple = true then value must be a list -- if typeClass = ''primitive'' the value object is a String or a List of Strings depending on the multiple flag -- if typeClass = ''compound'' the value object is a FieldDTO or a List of FieldDTOs depending on the multiple flag -- if typeClass = ''controlledVocabulary'' the value(s) are checked against the list of known values stored in the database -typeName validations (child objects with their required and allowed typeNames are configured automatically by the database schema). Examples include: -- dsDescription validation includes checks for typeName = ''dsDescriptionValue'' (required) and ''dsDescriptionDate'' (optional) -- datasetContact validation includes checks for typeName = ''datasetContactName'' (required) and ''datasetContactEmail''; ''datasetContactAffiliation'' (optional) -- etc. + +- JSON formatting +- required fields +- typeClass must follow these rules: + + - if multiple = true then value must be a list + - if typeClass = ``primitive`` the value object is a String or a List of Strings depending on the multiple flag + - if typeClass = ``compound`` the value object is a FieldDTO or a List of FieldDTOs depending on the multiple flag + - if typeClass = ``controlledVocabulary`` the values are checked against the list of allowed values stored in the database + - typeName validations (child objects with their required and allowed typeNames are configured automatically by the database schema). Examples include: + + - dsDescription validation includes checks for typeName = ``dsDescriptionValue`` (required) and ``dsDescriptionDate`` (optional) + - datasetContact validation includes checks for typeName = ``datasetContactName`` (required) and ``datasetContactEmail``; ``datasetContactAffiliation`` (optional) .. code-block:: bash From 0dc2ea9536d270640a2dfc6508d8d5bb64a776f0 Mon Sep 17 00:00:00 2001 From: Ludovic DANIEL Date: Tue, 11 Jun 2024 15:49:21 +0200 Subject: [PATCH 130/622] Fixed wrong asset class import --- src/test/java/edu/harvard/iq/dataverse/api/SavedSearchIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SavedSearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SavedSearchIT.java index b672120c16d..1afd0a55b0b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SavedSearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SavedSearchIT.java @@ -3,7 +3,6 @@ import io.restassured.RestAssured; import io.restassured.path.json.JsonPath; import io.restassured.response.Response; -import io.smallrye.common.constraint.Assert; import jakarta.json.Json; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObjectBuilder; @@ -19,6 +18,7 @@ import static jakarta.ws.rs.core.Response.Status.NOT_FOUND; import static jakarta.ws.rs.core.Response.Status.OK; import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED; +import static org.junit.jupiter.api.Assertions.assertTrue; public class SavedSearchIT { @@ -225,7 +225,7 @@ public void testSavedSearches() { getListReponse.prettyPrint(); JsonPath path3 = JsonPath.from(getListReponse.body().asString()); List listAfterDelete = path3.getList("data.savedSearches"); - Assert.assertTrue(listBeforeDelete.size() - 1 == listAfterDelete.size()); + assertTrue(listBeforeDelete.size() - 1 == listAfterDelete.size()); } public String createSavedSearchJson(String query, Integer creatorId, Integer definitionPointId, String... filterQueries) { From 614da60f8f2b2dc2a9a155575ff3eaba0dc9cd41 Mon Sep 17 00:00:00 2001 From: Ludovic DANIEL Date: Tue, 11 Jun 2024 16:28:39 +0200 Subject: [PATCH 131/622] Simplify assertion --- src/test/java/edu/harvard/iq/dataverse/api/SavedSearchIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SavedSearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SavedSearchIT.java index 1afd0a55b0b..359fc49ed4e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SavedSearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SavedSearchIT.java @@ -18,6 +18,7 @@ import static jakarta.ws.rs.core.Response.Status.NOT_FOUND; import static jakarta.ws.rs.core.Response.Status.OK; import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; public class SavedSearchIT { @@ -225,7 +226,7 @@ public void testSavedSearches() { getListReponse.prettyPrint(); JsonPath path3 = JsonPath.from(getListReponse.body().asString()); List listAfterDelete = path3.getList("data.savedSearches"); - assertTrue(listBeforeDelete.size() - 1 == listAfterDelete.size()); + assertEquals(listBeforeDelete.size() - 1, listAfterDelete.size()); } public String createSavedSearchJson(String query, Integer creatorId, Integer definitionPointId, String... filterQueries) { From 68363663e381adb573016a261e06ea9e6d09063a Mon Sep 17 00:00:00 2001 From: Ludovic DANIEL Date: Thu, 13 Jun 2024 10:31:02 +0200 Subject: [PATCH 132/622] Added suggested doc modification --- doc/sphinx-guides/source/api/native-api.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 71f51b962c8..2310acf3489 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -5745,7 +5745,7 @@ The ``$identifier`` should start with an ``@`` if it's a user. Groups start with Saved Search ~~~~~~~~~~~~ -The Saved Search, Linked Dataverses, and Linked Datasets features are only accessible to superusers except for Linking a dataset. The following API endpoints were added to help people with access to the “admin” API make use of these features in their current form, keep in mind that they are partially experimental. +The Saved Search, Linked Dataverses, and Linked Datasets features are only accessible to superusers except for linking a dataset. The following API endpoints were added to help people with access to the "admin" API make use of these features in their current form. Keep in mind that they are partially experimental. List all saved searches. :: GET http://$SERVER/api/admin/savedsearches/list @@ -5754,7 +5754,9 @@ List a saved search by database id. :: GET http://$SERVER/api/admin/savedsearches/$id -Delete a saved search by database id. The ``unlink=true`` query parameter unlink links (Linked dataset or Dataverse collection) related to the deleted saved search. This parameter should be well considered as you cannot know if the saved search created the links or if someone else did via other API. Also, it may be followed ``/makelinks/all`` depending on the need if other saved searches could recreate some deleted links or by reindexing some Dataverse or Dataset. :: +Delete a saved search by database id:: + +The ``unlink=true`` query parameter unlinks all links (linked dataset or Dataverse collection) associated with the deleted saved search. Use of this parameter should be well considered as you cannot know if the links were created manually or by the saved search. After deleting a saved search with ``unlink=true``, we recommend running ``/makelinks/all`` just in case there was a dataset that was linked by another saved search. (Saved searched can link the same dataset.) Reindexing might be necessary as well. DELETE http://$SERVER/api/admin/savedsearches/$id?unlink=true From 633cd6c374a7831b7f55d952e07a787bb0c4a739 Mon Sep 17 00:00:00 2001 From: Ludovic DANIEL Date: Thu, 13 Jun 2024 10:40:30 +0200 Subject: [PATCH 133/622] Try to fix guide doc issue --- doc/sphinx-guides/source/api/native-api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 2310acf3489..735c6747908 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -5754,9 +5754,9 @@ List a saved search by database id. :: GET http://$SERVER/api/admin/savedsearches/$id -Delete a saved search by database id:: +Delete a saved search by database id. -The ``unlink=true`` query parameter unlinks all links (linked dataset or Dataverse collection) associated with the deleted saved search. Use of this parameter should be well considered as you cannot know if the links were created manually or by the saved search. After deleting a saved search with ``unlink=true``, we recommend running ``/makelinks/all`` just in case there was a dataset that was linked by another saved search. (Saved searched can link the same dataset.) Reindexing might be necessary as well. +The ``unlink=true`` query parameter unlinks all links (linked dataset or Dataverse collection) associated with the deleted saved search. Use of this parameter should be well considered as you cannot know if the links were created manually or by the saved search. After deleting a saved search with ``unlink=true``, we recommend running ``/makelinks/all`` just in case there was a dataset that was linked by another saved search. (Saved searched can link the same dataset.) Reindexing might be necessary as well.:: DELETE http://$SERVER/api/admin/savedsearches/$id?unlink=true From ea75216025ca358f1c9d7d3c11d324b6dbe4f0f2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 10 Jun 2024 12:58:46 -0400 Subject: [PATCH 134/622] only files in latestversionforcopy --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index e6c1a1ae6b4..ae7c21b3308 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -43,6 +43,7 @@ import edu.harvard.iq.dataverse.DatasetRelPublication; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.api.Util; @@ -934,10 +935,12 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th } } } - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { + List fmds = dataset.getLatestVersionForCopy().getFileMetadatas(); + if (!(fmds==null) && fmds.isEmpty()) { attributes.clear(); attributes.put("relationType", "HasPart"); - for (DataFile dataFile : dataset.getFiles()) { + for (FileMetadata fmd : fmds) { + DataFile dataFile = fmd.getDataFile(); GlobalId pid = dataFile.getGlobalId(); if (pid != null) { String pubIdType = getCanonicalPublicationType(pid.getProtocol()); From b6bd530db70dfbc78017445e4e2af79233b6e899 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 11 Jun 2024 16:40:58 -0400 Subject: [PATCH 135/622] fix date parsing, clear bad values --- .../dataverse/pidproviders/doi/XmlMetadataTemplate.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index ae7c21b3308..9ba1e4e3116 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -692,22 +692,27 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } } if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { + //Minimal clean-up - useful? Parse/format would remove unused chars, and an exception would clear the date so we don't send nonsense if(StringUtils.isNotBlank(startDate)) { try { - Date start = Util.getDateTimeFormat().parse(startDate); + Date start = Util.getDateFormat().parse(startDate); startDate = Util.getDateFormat().format(start); } catch (ParseException e) { logger.warning("Could not parse date: " + startDate); + startDate = null; } } if(StringUtils.isNotBlank(endDate)) { try { - Date end = Util.getDateTimeFormat().parse(endDate); + Date end = Util.getDateFormat().parse(endDate); endDate = Util.getDateFormat().format(end); } catch (ParseException e) { logger.warning("Could not parse date: " + endDate); + endDate = null; }; } + } + if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Collected"); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, (startDate + "/" + endDate).trim()); From e1383d77d3304418ebf3decfd257ca0610994ce1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 13:10:04 -0400 Subject: [PATCH 136/622] relationType entry in citation block --- conf/solr/9.3.0/schema.xml | 2 + ...dataset-create-new-all-default-fields.json | 2935 ++++++++--------- scripts/api/data/metadatablocks/citation.tsv | 105 +- .../iq/dataverse/DatasetFieldConstant.java | 1 + .../dublincore/DublinCoreExportUtil.java | 35 +- .../export/openaire/OpenAireExportUtil.java | 9 +- .../java/propertyFiles/citation.properties | 2 + .../export/OpenAireExportUtilTest.java | 2 +- 8 files changed, 1502 insertions(+), 1589 deletions(-) diff --git a/conf/solr/9.3.0/schema.xml b/conf/solr/9.3.0/schema.xml index 5dde750573d..32f10d0a621 100644 --- a/conf/solr/9.3.0/schema.xml +++ b/conf/solr/9.3.0/schema.xml @@ -349,6 +349,7 @@ + @@ -589,6 +590,7 @@ + diff --git a/scripts/api/data/dataset-create-new-all-default-fields.json b/scripts/api/data/dataset-create-new-all-default-fields.json index cc856c6372f..e522ab32b1d 100644 --- a/scripts/api/data/dataset-create-new-all-default-fields.json +++ b/scripts/api/data/dataset-create-new-all-default-fields.json @@ -1,1527 +1,1410 @@ { - "datasetVersion": { - "license": { - "name": "CC0 1.0", - "uri": "http://creativecommons.org/publicdomain/zero/1.0" - }, - "metadataBlocks": { - "citation": { - "displayName": "Citation Metadata", - "fields": [ - { - "typeName": "title", - "multiple": false, - "typeClass": "primitive", - "value": "Replication Data for: Title" - }, - { - "typeName": "subtitle", - "multiple": false, - "typeClass": "primitive", - "value": "Subtitle" - }, - { - "typeName": "alternativeTitle", - "multiple": true, - "typeClass": "primitive", - "value": ["Alternative Title"] - }, - { - "typeName": "alternativeURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://AlternativeURL.org" - }, - { - "typeName": "otherId", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "otherIdAgency": { - "typeName": "otherIdAgency", - "multiple": false, - "typeClass": "primitive", - "value": "OtherIDAgency1" - }, - "otherIdValue": { - "typeName": "otherIdValue", - "multiple": false, - "typeClass": "primitive", - "value": "OtherIDIdentifier1" - } - }, - { - "otherIdAgency": { - "typeName": "otherIdAgency", - "multiple": false, - "typeClass": "primitive", - "value": "OtherIDAgency2" - }, - "otherIdValue": { - "typeName": "otherIdValue", - "multiple": false, - "typeClass": "primitive", - "value": "OtherIDIdentifier2" - } - } - ] - }, - { - "typeName": "author", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "authorName": { - "typeName": "authorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastAuthor1, FirstAuthor1" - }, - "authorAffiliation": { - "typeName": "authorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "AuthorAffiliation1" - }, - "authorIdentifierScheme": { - "typeName": "authorIdentifierScheme", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "ORCID" - }, - "authorIdentifier": { - "typeName": "authorIdentifier", - "multiple": false, - "typeClass": "primitive", - "value": "AuthorIdentifier1" - } - }, - { - "authorName": { - "typeName": "authorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastAuthor2, FirstAuthor2" - }, - "authorAffiliation": { - "typeName": "authorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "AuthorAffiliation2" - }, - "authorIdentifierScheme": { - "typeName": "authorIdentifierScheme", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "ISNI" - }, - "authorIdentifier": { - "typeName": "authorIdentifier", - "multiple": false, - "typeClass": "primitive", - "value": "AuthorIdentifier2" - } - } - ] - }, - { - "typeName": "datasetContact", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "datasetContactName": { - "typeName": "datasetContactName", - "multiple": false, - "typeClass": "primitive", - "value": "LastContact1, FirstContact1" - }, - "datasetContactAffiliation": { - "typeName": "datasetContactAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "ContactAffiliation1" - }, - "datasetContactEmail": { - "typeName": "datasetContactEmail", - "multiple": false, - "typeClass": "primitive", - "value": "ContactEmail1@mailinator.com" - } - }, - { - "datasetContactName": { - "typeName": "datasetContactName", - "multiple": false, - "typeClass": "primitive", - "value": "LastContact2, FirstContact2" - }, - "datasetContactAffiliation": { - "typeName": "datasetContactAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "ContactAffiliation2" - }, - "datasetContactEmail": { - "typeName": "datasetContactEmail", - "multiple": false, - "typeClass": "primitive", - "value": "ContactEmail2@mailinator.com" - } - } - ] - }, - { - "typeName": "dsDescription", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "dsDescriptionValue": { - "typeName": "dsDescriptionValue", - "multiple": false, - "typeClass": "primitive", - "value": "DescriptionText1" - }, - "dsDescriptionDate": { - "typeName": "dsDescriptionDate", - "multiple": false, - "typeClass": "primitive", - "value": "1000-01-01" - } - }, - { - "dsDescriptionValue": { - "typeName": "dsDescriptionValue", - "multiple": false, - "typeClass": "primitive", - "value": "DescriptionText2" - }, - "dsDescriptionDate": { - "typeName": "dsDescriptionDate", - "multiple": false, - "typeClass": "primitive", - "value": "1000-02-02" - } - } - ] - }, - { - "typeName": "subject", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Agricultural Sciences", - "Business and Management", - "Engineering", - "Law" - ] - }, - { - "typeName": "keyword", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "keywordValue": { - "typeName": "keywordValue", - "multiple": false, - "typeClass": "primitive", - "value": "KeywordTerm1" - }, - "keywordTermURI": { - "typeName": "keywordTermURI", - "multiple": false, - "typeClass": "primitive", - "value": "http://keywordTermURI1.org" - }, - "keywordVocabulary": { - "typeName": "keywordVocabulary", - "multiple": false, - "typeClass": "primitive", - "value": "KeywordVocabulary1" - }, - "keywordVocabularyURI": { - "typeName": "keywordVocabularyURI", - "multiple": false, - "typeClass": "primitive", - "value": "http://KeywordVocabularyURL1.org" - } - }, - { - "keywordValue": { - "typeName": "keywordValue", - "multiple": false, - "typeClass": "primitive", - "value": "KeywordTerm2" - }, - "keywordTermURI": { - "typeName": "keywordTermURI", - "multiple": false, - "typeClass": "primitive", - "value": "http://keywordTermURI2.org" - }, - "keywordVocabulary": { - "typeName": "keywordVocabulary", - "multiple": false, - "typeClass": "primitive", - "value": "KeywordVocabulary2" - }, - "keywordVocabularyURI": { - "typeName": "keywordVocabularyURI", - "multiple": false, - "typeClass": "primitive", - "value": "http://KeywordVocabularyURL2.org" - } - } - ] - }, - { - "typeName": "topicClassification", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "topicClassValue": { - "typeName": "topicClassValue", - "multiple": false, - "typeClass": "primitive", - "value": "Topic Classification Term1" - }, - "topicClassVocab": { - "typeName": "topicClassVocab", - "multiple": false, - "typeClass": "primitive", - "value": "Topic Classification Vocab1" - }, - "topicClassVocabURI": { - "typeName": "topicClassVocabURI", - "multiple": false, - "typeClass": "primitive", - "value": "https://TopicClassificationURL1.com" - } - }, - { - "topicClassValue": { - "typeName": "topicClassValue", - "multiple": false, - "typeClass": "primitive", - "value": "Topic Classification Term2" - }, - "topicClassVocab": { - "typeName": "topicClassVocab", - "multiple": false, - "typeClass": "primitive", - "value": "Topic Classification Vocab2" - }, - "topicClassVocabURI": { - "typeName": "topicClassVocabURI", - "multiple": false, - "typeClass": "primitive", - "value": "https://TopicClassificationURL2.com" - } - } - ] - }, - { - "typeName": "publication", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "publicationCitation": { - "typeName": "publicationCitation", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationCitation1" - }, - "publicationIDType": { - "typeName": "publicationIDType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "ark" - }, - "publicationIDNumber": { - "typeName": "publicationIDNumber", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationIDNumber1" - }, - "publicationURL": { - "typeName": "publicationURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://RelatedPublicationURL1.org" - } - }, - { - "publicationCitation": { - "typeName": "publicationCitation", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationCitation2" - }, - "publicationIDType": { - "typeName": "publicationIDType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "arXiv" - }, - "publicationIDNumber": { - "typeName": "publicationIDNumber", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationIDNumber2" - }, - "publicationURL": { - "typeName": "publicationURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://RelatedPublicationURL2.org" - } - } - ] - }, - { - "typeName": "notesText", - "multiple": false, - "typeClass": "primitive", - "value": "Notes1" - }, - { - "typeName": "language", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Abkhaz", - "Afar" - ] - }, - { - "typeName": "producer", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "producerName": { - "typeName": "producerName", - "multiple": false, - "typeClass": "primitive", - "value": "LastProducer1, FirstProducer1" - }, - "producerAffiliation": { - "typeName": "producerAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "ProducerAffiliation1" - }, - "producerAbbreviation": { - "typeName": "producerAbbreviation", - "multiple": false, - "typeClass": "primitive", - "value": "ProducerAbbreviation1" - }, - "producerURL": { - "typeName": "producerURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://ProducerURL1.org" - }, - "producerLogoURL": { - "typeName": "producerLogoURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://ProducerLogoURL1.org" - } - }, - { - "producerName": { - "typeName": "producerName", - "multiple": false, - "typeClass": "primitive", - "value": "LastProducer2, FirstProducer2" - }, - "producerAffiliation": { - "typeName": "producerAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "ProducerAffiliation2" - }, - "producerAbbreviation": { - "typeName": "producerAbbreviation", - "multiple": false, - "typeClass": "primitive", - "value": "ProducerAbbreviation2" - }, - "producerURL": { - "typeName": "producerURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://ProducerURL2.org" - }, - "producerLogoURL": { - "typeName": "producerLogoURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://ProducerLogoURL2.org" - } - } - ] - }, - { - "typeName": "productionDate", - "multiple": false, - "typeClass": "primitive", - "value": "1003-01-01" - }, - { - "typeName": "productionPlace", - "multiple": true, - "typeClass": "primitive", - "value": ["ProductionPlace"] - }, - { - "typeName": "contributor", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "contributorType": { - "typeName": "contributorType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "Data Collector" - }, - "contributorName": { - "typeName": "contributorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastContributor1, FirstContributor1" - } - }, - { - "contributorType": { - "typeName": "contributorType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "Data Curator" - }, - "contributorName": { - "typeName": "contributorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastContributor2, FirstContributor2" - } - } - ] - }, - { - "typeName": "grantNumber", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "grantNumberAgency": { - "typeName": "grantNumberAgency", - "multiple": false, - "typeClass": "primitive", - "value": "GrantInformationGrantAgency1" - }, - "grantNumberValue": { - "typeName": "grantNumberValue", - "multiple": false, - "typeClass": "primitive", - "value": "GrantInformationGrantNumber1" - } - }, - { - "grantNumberAgency": { - "typeName": "grantNumberAgency", - "multiple": false, - "typeClass": "primitive", - "value": "GrantInformationGrantAgency2" - }, - "grantNumberValue": { - "typeName": "grantNumberValue", - "multiple": false, - "typeClass": "primitive", - "value": "GrantInformationGrantNumber2" - } - } - ] - }, - { - "typeName": "distributor", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "distributorName": { - "typeName": "distributorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastDistributor1, FirstDistributor1" - }, - "distributorAffiliation": { - "typeName": "distributorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "DistributorAffiliation1" - }, - "distributorAbbreviation": { - "typeName": "distributorAbbreviation", - "multiple": false, - "typeClass": "primitive", - "value": "DistributorAbbreviation1" - }, - "distributorURL": { - "typeName": "distributorURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://DistributorURL1.org" - }, - "distributorLogoURL": { - "typeName": "distributorLogoURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://DistributorLogoURL1.org" - } - }, - { - "distributorName": { - "typeName": "distributorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastDistributor2, FirstDistributor2" - }, - "distributorAffiliation": { - "typeName": "distributorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "DistributorAffiliation2" - }, - "distributorAbbreviation": { - "typeName": "distributorAbbreviation", - "multiple": false, - "typeClass": "primitive", - "value": "DistributorAbbreviation2" - }, - "distributorURL": { - "typeName": "distributorURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://DistributorURL2.org" - }, - "distributorLogoURL": { - "typeName": "distributorLogoURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://DistributorLogoURL2.org" - } - } - ] - }, - { - "typeName": "distributionDate", - "multiple": false, - "typeClass": "primitive", - "value": "1004-01-01" - }, - { - "typeName": "depositor", - "multiple": false, - "typeClass": "primitive", - "value": "LastDepositor, FirstDepositor" - }, - { - "typeName": "dateOfDeposit", - "multiple": false, - "typeClass": "primitive", - "value": "1002-01-01" - }, - { - "typeName": "timePeriodCovered", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "timePeriodCoveredStart": { - "typeName": "timePeriodCoveredStart", - "multiple": false, - "typeClass": "primitive", - "value": "1005-01-01" - }, - "timePeriodCoveredEnd": { - "typeName": "timePeriodCoveredEnd", - "multiple": false, - "typeClass": "primitive", - "value": "1005-01-02" - } - }, - { - "timePeriodCoveredStart": { - "typeName": "timePeriodCoveredStart", - "multiple": false, - "typeClass": "primitive", - "value": "1005-02-01" - }, - "timePeriodCoveredEnd": { - "typeName": "timePeriodCoveredEnd", - "multiple": false, - "typeClass": "primitive", - "value": "1005-02-02" - } - } - ] - }, - { - "typeName": "dateOfCollection", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "dateOfCollectionStart": { - "typeName": "dateOfCollectionStart", - "multiple": false, - "typeClass": "primitive", - "value": "1006-01-01" - }, - "dateOfCollectionEnd": { - "typeName": "dateOfCollectionEnd", - "multiple": false, - "typeClass": "primitive", - "value": "1006-01-01" - } - }, - { - "dateOfCollectionStart": { - "typeName": "dateOfCollectionStart", - "multiple": false, - "typeClass": "primitive", - "value": "1006-02-01" - }, - "dateOfCollectionEnd": { - "typeName": "dateOfCollectionEnd", - "multiple": false, - "typeClass": "primitive", - "value": "1006-02-02" - } - } - ] - }, - { - "typeName": "kindOfData", - "multiple": true, - "typeClass": "primitive", - "value": [ - "KindOfData1", - "KindOfData2" - ] - }, - { - "typeName": "series", - "multiple": true, - "typeClass": "compound", - "value": [{ - "seriesName": { - "typeName": "seriesName", - "multiple": false, - "typeClass": "primitive", - "value": "SeriesName" - }, - "seriesInformation": { - "typeName": "seriesInformation", - "multiple": false, - "typeClass": "primitive", - "value": "SeriesInformation" - } - }] - }, - { - "typeName": "software", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "softwareName": { - "typeName": "softwareName", - "multiple": false, - "typeClass": "primitive", - "value": "SoftwareName1" - }, - "softwareVersion": { - "typeName": "softwareVersion", - "multiple": false, - "typeClass": "primitive", - "value": "SoftwareVersion1" - } - }, - { - "softwareName": { - "typeName": "softwareName", - "multiple": false, - "typeClass": "primitive", - "value": "SoftwareName2" - }, - "softwareVersion": { - "typeName": "softwareVersion", - "multiple": false, - "typeClass": "primitive", - "value": "SoftwareVersion2" - } - } - ] - }, - { - "typeName": "relatedMaterial", - "multiple": true, - "typeClass": "primitive", - "value": [ - "RelatedMaterial1", - "RelatedMaterial2" - ] - }, - { - "typeName": "relatedDatasets", - "multiple": true, - "typeClass": "primitive", - "value": [ - "RelatedDatasets1", - "RelatedDatasets2" - ] - }, - { - "typeName": "otherReferences", - "multiple": true, - "typeClass": "primitive", - "value": [ - "OtherReferences1", - "OtherReferences2" - ] - }, - { - "typeName": "dataSources", - "multiple": true, - "typeClass": "primitive", - "value": [ - "DataSources1", - "DataSources2" - ] - }, - { - "typeName": "originOfSources", - "multiple": false, - "typeClass": "primitive", - "value": "OriginOfSources" - }, - { - "typeName": "characteristicOfSources", - "multiple": false, - "typeClass": "primitive", - "value": "CharacteristicOfSourcesNoted" - }, - { - "typeName": "accessToSources", - "multiple": false, - "typeClass": "primitive", - "value": "DocumentationAndAccessToSources" - } - ] - }, - "geospatial": { - "displayName": "Geospatial Metadata", - "fields": [ - { - "typeName": "geographicCoverage", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "country": { - "typeName": "country", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "Afghanistan" - }, - "state": { - "typeName": "state", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageStateProvince1" - }, - "city": { - "typeName": "city", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageCity1" - }, - "otherGeographicCoverage": { - "typeName": "otherGeographicCoverage", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageOther1" - } - }, - { - "country": { - "typeName": "country", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "Albania" - }, - "state": { - "typeName": "state", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageStateProvince2" - }, - "city": { - "typeName": "city", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageCity2" - }, - "otherGeographicCoverage": { - "typeName": "otherGeographicCoverage", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageOther2" - } - } - ] - }, - { - "typeName": "geographicUnit", - "multiple": true, - "typeClass": "primitive", - "value": [ - "GeographicUnit1", - "GeographicUnit2" - ] - }, - { - "typeName": "geographicBoundingBox", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "westLongitude": { - "typeName": "westLongitude", - "multiple": false, - "typeClass": "primitive", - "value": "-72" - }, - "eastLongitude": { - "typeName": "eastLongitude", - "multiple": false, - "typeClass": "primitive", - "value": "-70" - }, - "northLatitude": { - "typeName": "northLatitude", - "multiple": false, - "typeClass": "primitive", - "value": "43" - }, - "southLatitude": { - "typeName": "southLatitude", - "multiple": false, - "typeClass": "primitive", - "value": "42" - } - }, - { - "westLongitude": { - "typeName": "westLongitude", - "multiple": false, - "typeClass": "primitive", - "value": "-18" - }, - "eastLongitude": { - "typeName": "eastLongitude", - "multiple": false, - "typeClass": "primitive", - "value": "-13" - }, - "northLatitude": { - "typeName": "northLatitude", - "multiple": false, - "typeClass": "primitive", - "value": "29" - }, - "southLatitude": { - "typeName": "southLatitude", - "multiple": false, - "typeClass": "primitive", - "value": "28" - } - } - ] - } - ] - }, - "socialscience": { - "displayName": "Social Science and Humanities Metadata", - "fields": [ - { - "typeName": "unitOfAnalysis", - "multiple": true, - "typeClass": "primitive", - "value": [ - "UnitOfAnalysis1", - "UnitOfAnalysis2" - ] - }, - { - "typeName": "universe", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Universe1", - "Universe2" - ] - }, - { - "typeName": "timeMethod", - "multiple": false, - "typeClass": "primitive", - "value": "TimeMethod" - }, - { - "typeName": "dataCollector", - "multiple": false, - "typeClass": "primitive", - "value": "LastDataCollector1, FirstDataCollector1" - }, - { - "typeName": "collectorTraining", - "multiple": false, - "typeClass": "primitive", - "value": "CollectorTraining" - }, - { - "typeName": "frequencyOfDataCollection", - "multiple": false, - "typeClass": "primitive", - "value": "Frequency" - }, - { - "typeName": "samplingProcedure", - "multiple": false, - "typeClass": "primitive", - "value": "SamplingProcedure" - }, - { - "typeName": "targetSampleSize", - "multiple": false, - "typeClass": "compound", - "value": { - "targetSampleActualSize": { - "typeName": "targetSampleActualSize", - "multiple": false, - "typeClass": "primitive", - "value": "100" - }, - "targetSampleSizeFormula": { - "typeName": "targetSampleSizeFormula", - "multiple": false, - "typeClass": "primitive", - "value": "TargetSampleSizeFormula" - } - } - }, - { - "typeName": "deviationsFromSampleDesign", - "multiple": false, - "typeClass": "primitive", - "value": "MajorDeviationsForSampleDesign" - }, - { - "typeName": "collectionMode", - "multiple": true, - "typeClass": "primitive", - "value": ["CollectionMode"] - }, - { - "typeName": "researchInstrument", - "multiple": false, - "typeClass": "primitive", - "value": "TypeOfResearchInstrument" - }, - { - "typeName": "dataCollectionSituation", - "multiple": false, - "typeClass": "primitive", - "value": "CharacteristicsOfDataCollectionSituation" - }, - { - "typeName": "actionsToMinimizeLoss", - "multiple": false, - "typeClass": "primitive", - "value": "ActionsToMinimizeLosses" - }, - { - "typeName": "controlOperations", - "multiple": false, - "typeClass": "primitive", - "value": "ControlOperations" - }, - { - "typeName": "weighting", - "multiple": false, - "typeClass": "primitive", - "value": "Weighting" - }, - { - "typeName": "cleaningOperations", - "multiple": false, - "typeClass": "primitive", - "value": "CleaningOperations" - }, - { - "typeName": "datasetLevelErrorNotes", - "multiple": false, - "typeClass": "primitive", - "value": "StudyLevelErrorNotes" - }, - { - "typeName": "responseRate", - "multiple": false, - "typeClass": "primitive", - "value": "ResponseRate" - }, - { - "typeName": "samplingErrorEstimates", - "multiple": false, - "typeClass": "primitive", - "value": "EstimatesOfSamplingError" - }, - { - "typeName": "otherDataAppraisal", - "multiple": false, - "typeClass": "primitive", - "value": "OtherFormsOfDataAppraisal" - }, - { - "typeName": "socialScienceNotes", - "multiple": false, - "typeClass": "compound", - "value": { - "socialScienceNotesType": { - "typeName": "socialScienceNotesType", - "multiple": false, - "typeClass": "primitive", - "value": "NotesType" - }, - "socialScienceNotesSubject": { - "typeName": "socialScienceNotesSubject", - "multiple": false, - "typeClass": "primitive", - "value": "NotesSubject" - }, - "socialScienceNotesText": { - "typeName": "socialScienceNotesText", - "multiple": false, - "typeClass": "primitive", - "value": "NotesText" - } - } - } - ] - }, - "astrophysics": { - "displayName": "Astronomy and Astrophysics Metadata", - "fields": [ - { - "typeName": "astroType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Image", - "Mosaic", - "EventList", - "Cube" - ] - }, - { - "typeName": "astroFacility", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Facility1", - "Facility2" - ] - }, - { - "typeName": "astroInstrument", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Instrument1", - "Instrument2" - ] - }, - { - "typeName": "astroObject", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Object1", - "Object2" - ] - }, - { - "typeName": "resolution.Spatial", - "multiple": false, - "typeClass": "primitive", - "value": "SpatialResolution" - }, - { - "typeName": "resolution.Spectral", - "multiple": false, - "typeClass": "primitive", - "value": "SpectralResolution" - }, - { - "typeName": "resolution.Temporal", - "multiple": false, - "typeClass": "primitive", - "value": "TimeResolution" - }, - { - "typeName": "coverage.Spectral.Bandpass", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Bandpass1", - "Bandpass2" - ] - }, - { - "typeName": "coverage.Spectral.CentralWavelength", - "multiple": true, - "typeClass": "primitive", - "value": [ - "3001", - "3002" - ] - }, - { - "typeName": "coverage.Spectral.Wavelength", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "coverage.Spectral.MinimumWavelength": { - "typeName": "coverage.Spectral.MinimumWavelength", - "multiple": false, - "typeClass": "primitive", - "value": "4001" - }, - "coverage.Spectral.MaximumWavelength": { - "typeName": "coverage.Spectral.MaximumWavelength", - "multiple": false, - "typeClass": "primitive", - "value": "4002" - } - }, - { - "coverage.Spectral.MinimumWavelength": { - "typeName": "coverage.Spectral.MinimumWavelength", - "multiple": false, - "typeClass": "primitive", - "value": "4003" - }, - "coverage.Spectral.MaximumWavelength": { - "typeName": "coverage.Spectral.MaximumWavelength", - "multiple": false, - "typeClass": "primitive", - "value": "4004" - } - } - ] - }, - { - "typeName": "coverage.Temporal", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "coverage.Temporal.StartTime": { - "typeName": "coverage.Temporal.StartTime", - "multiple": false, - "typeClass": "primitive", - "value": "1007-01-01" - }, - "coverage.Temporal.StopTime": { - "typeName": "coverage.Temporal.StopTime", - "multiple": false, - "typeClass": "primitive", - "value": "1007-01-02" - } - }, - { - "coverage.Temporal.StartTime": { - "typeName": "coverage.Temporal.StartTime", - "multiple": false, - "typeClass": "primitive", - "value": "1007-02-01" - }, - "coverage.Temporal.StopTime": { - "typeName": "coverage.Temporal.StopTime", - "multiple": false, - "typeClass": "primitive", - "value": "1007-02-02" - } - } - ] - }, - { - "typeName": "coverage.Spatial", - "multiple": true, - "typeClass": "primitive", - "value": [ - "SkyCoverage1", - "SkyCoverage2" - ] - }, - { - "typeName": "coverage.Depth", - "multiple": false, - "typeClass": "primitive", - "value": "200" - }, - { - "typeName": "coverage.ObjectDensity", - "multiple": false, - "typeClass": "primitive", - "value": "300" - }, - { - "typeName": "coverage.ObjectCount", - "multiple": false, - "typeClass": "primitive", - "value": "400" - }, - { - "typeName": "coverage.SkyFraction", - "multiple": false, - "typeClass": "primitive", - "value": "500" - }, - { - "typeName": "coverage.Polarization", - "multiple": false, - "typeClass": "primitive", - "value": "Polarization" - }, - { - "typeName": "redshiftType", - "multiple": false, - "typeClass": "primitive", - "value": "RedshiftType" - }, - { - "typeName": "resolution.Redshift", - "multiple": false, - "typeClass": "primitive", - "value": "600" - }, - { - "typeName": "coverage.RedshiftValue", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "coverage.Redshift.MinimumValue": { - "typeName": "coverage.Redshift.MinimumValue", - "multiple": false, - "typeClass": "primitive", - "value": "701" - }, - "coverage.Redshift.MaximumValue": { - "typeName": "coverage.Redshift.MaximumValue", - "multiple": false, - "typeClass": "primitive", - "value": "702" - } - }, - { - "coverage.Redshift.MinimumValue": { - "typeName": "coverage.Redshift.MinimumValue", - "multiple": false, - "typeClass": "primitive", - "value": "703" - }, - "coverage.Redshift.MaximumValue": { - "typeName": "coverage.Redshift.MaximumValue", - "multiple": false, - "typeClass": "primitive", - "value": "704" - } - } - ] - } - ] - }, - "biomedical": { - "displayName": "Life Sciences Metadata", - "fields": [ - { - "typeName": "studyDesignType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Case Control", - "Cross Sectional", - "Cohort Study", - "Not Specified" - ] - }, - { - "typeName": "studyFactorType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Age", - "Biomarkers", - "Cell Surface Markers", - "Developmental Stage" - ] - }, - { - "typeName": "studyAssayOrganism", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Arabidopsis thaliana", - "Bos taurus", - "Caenorhabditis elegans", - "Danio rerio (zebrafish)" - ] - }, - { - "typeName": "studyAssayOtherOrganism", - "multiple": true, - "typeClass": "primitive", - "value": [ - "OtherOrganism1", - "OtherOrganism2" - ] - }, - { - "typeName": "studyAssayMeasurementType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "genome sequencing", - "cell sorting", - "clinical chemistry analysis", - "DNA methylation profiling" - ] - }, - { - "typeName": "studyAssayOtherMeasurmentType", - "multiple": true, - "typeClass": "primitive", - "value": [ - "OtherMeasurementType1", - "OtherMeasurementType2" - ] - }, - { - "typeName": "studyAssayTechnologyType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "culture based drug susceptibility testing, single concentration", - "culture based drug susceptibility testing, two concentrations", - "culture based drug susceptibility testing, three or more concentrations (minimium inhibitory concentration measurement)", - "flow cytometry" - ] - }, - { - "typeName": "studyAssayPlatform", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "210-MS GC Ion Trap (Varian)", - "220-MS GC Ion Trap (Varian)", - "225-MS GC Ion Trap (Varian)", - "300-MS quadrupole GC/MS (Varian)" - ] - }, - { - "typeName": "studyAssayCellType", - "multiple": true, - "typeClass": "primitive", - "value": [ - "CellType1", - "CellType2" - ] - } - ] - }, - "journal": { - "displayName": "Journal Metadata", - "fields": [ - { - "typeName": "journalVolumeIssue", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "journalVolume": { - "typeName": "journalVolume", - "multiple": false, - "typeClass": "primitive", - "value": "JournalVolume1" - }, - "journalIssue": { - "typeName": "journalIssue", - "multiple": false, - "typeClass": "primitive", - "value": "JournalIssue1" - }, - "journalPubDate": { - "typeName": "journalPubDate", - "multiple": false, - "typeClass": "primitive", - "value": "1008-01-01" - } - }, - { - "journalVolume": { - "typeName": "journalVolume", - "multiple": false, - "typeClass": "primitive", - "value": "JournalVolume2" - }, - "journalIssue": { - "typeName": "journalIssue", - "multiple": false, - "typeClass": "primitive", - "value": "JournalIssue2" - }, - "journalPubDate": { - "typeName": "journalPubDate", - "multiple": false, - "typeClass": "primitive", - "value": "1008-02-01" - } - } - ] - }, - { - "typeName": "journalArticleType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "abstract" - } - ] - } - } - } -} + "datasetVersion" : { + "license" : { + "name" : "CC0 1.0", + "uri" : "http://creativecommons.org/publicdomain/zero/1.0" + }, + "metadataBlocks" : { + "citation" : { + "displayName" : "Citation Metadata", + "fields" : [{ + "typeName" : "title", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Replication Data for: Title" + }, { + "typeName" : "subtitle", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Subtitle" + }, { + "typeName" : "alternativeTitle", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Alternative Title" + ] + }, { + "typeName" : "alternativeURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://AlternativeURL.org" + }, { + "typeName" : "otherId", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "otherIdAgency" : { + "typeName" : "otherIdAgency", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherIDAgency1" + }, + "otherIdValue" : { + "typeName" : "otherIdValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherIDIdentifier1" + } + }, { + "otherIdAgency" : { + "typeName" : "otherIdAgency", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherIDAgency2" + }, + "otherIdValue" : { + "typeName" : "otherIdValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherIDIdentifier2" + } + } + ] + }, { + "typeName" : "author", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "authorName" : { + "typeName" : "authorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastAuthor1, FirstAuthor1" + }, + "authorAffiliation" : { + "typeName" : "authorAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "AuthorAffiliation1" + }, + "authorIdentifierScheme" : { + "typeName" : "authorIdentifierScheme", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "ORCID" + }, + "authorIdentifier" : { + "typeName" : "authorIdentifier", + "multiple" : false, + "typeClass" : "primitive", + "value" : "AuthorIdentifier1" + } + }, { + "authorName" : { + "typeName" : "authorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastAuthor2, FirstAuthor2" + }, + "authorAffiliation" : { + "typeName" : "authorAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "AuthorAffiliation2" + }, + "authorIdentifierScheme" : { + "typeName" : "authorIdentifierScheme", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "ISNI" + }, + "authorIdentifier" : { + "typeName" : "authorIdentifier", + "multiple" : false, + "typeClass" : "primitive", + "value" : "AuthorIdentifier2" + } + } + ] + }, { + "typeName" : "datasetContact", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "datasetContactName" : { + "typeName" : "datasetContactName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastContact1, FirstContact1" + }, + "datasetContactAffiliation" : { + "typeName" : "datasetContactAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ContactAffiliation1" + }, + "datasetContactEmail" : { + "typeName" : "datasetContactEmail", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ContactEmail1@mailinator.com" + } + }, { + "datasetContactName" : { + "typeName" : "datasetContactName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastContact2, FirstContact2" + }, + "datasetContactAffiliation" : { + "typeName" : "datasetContactAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ContactAffiliation2" + }, + "datasetContactEmail" : { + "typeName" : "datasetContactEmail", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ContactEmail2@mailinator.com" + } + } + ] + }, { + "typeName" : "dsDescription", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "dsDescriptionValue" : { + "typeName" : "dsDescriptionValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DescriptionText1" + }, + "dsDescriptionDate" : { + "typeName" : "dsDescriptionDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1000-01-01" + } + }, { + "dsDescriptionValue" : { + "typeName" : "dsDescriptionValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DescriptionText2" + }, + "dsDescriptionDate" : { + "typeName" : "dsDescriptionDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1000-02-02" + } + } + ] + }, { + "typeName" : "subject", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Agricultural Sciences", + "Business and Management", + "Engineering", + "Law" + ] + }, { + "typeName" : "keyword", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "keywordValue" : { + "typeName" : "keywordValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "KeywordTerm1" + }, + "keywordTermURI" : { + "typeName" : "keywordTermURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://keywordTermURI1.org" + }, + "keywordVocabulary" : { + "typeName" : "keywordVocabulary", + "multiple" : false, + "typeClass" : "primitive", + "value" : "KeywordVocabulary1" + }, + "keywordVocabularyURI" : { + "typeName" : "keywordVocabularyURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://KeywordVocabularyURL1.org" + } + }, { + "keywordValue" : { + "typeName" : "keywordValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "KeywordTerm2" + }, + "keywordTermURI" : { + "typeName" : "keywordTermURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://keywordTermURI2.org" + }, + "keywordVocabulary" : { + "typeName" : "keywordVocabulary", + "multiple" : false, + "typeClass" : "primitive", + "value" : "KeywordVocabulary2" + }, + "keywordVocabularyURI" : { + "typeName" : "keywordVocabularyURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://KeywordVocabularyURL2.org" + } + } + ] + }, { + "typeName" : "topicClassification", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "topicClassValue" : { + "typeName" : "topicClassValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Topic Classification Term1" + }, + "topicClassVocab" : { + "typeName" : "topicClassVocab", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Topic Classification Vocab1" + }, + "topicClassVocabURI" : { + "typeName" : "topicClassVocabURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "https://TopicClassificationURL1.com" + } + }, { + "topicClassValue" : { + "typeName" : "topicClassValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Topic Classification Term2" + }, + "topicClassVocab" : { + "typeName" : "topicClassVocab", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Topic Classification Vocab2" + }, + "topicClassVocabURI" : { + "typeName" : "topicClassVocabURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "https://TopicClassificationURL2.com" + } + } + ] + }, { + "typeName" : "publication", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "publicationRelationType" : { + "typeName" : "publicationRelationType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "IsSupplementTo" + }, + "publicationCitation" : { + "typeName" : "publicationCitation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RelatedPublicationCitation1" + }, + "publicationIDType" : { + "typeName" : "publicationIDType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "ark" + }, + "publicationIDNumber" : { + "typeName" : "publicationIDNumber", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RelatedPublicationIDNumber1" + }, + "publicationURL" : { + "typeName" : "publicationURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://RelatedPublicationURL1.org" + } + }, { + "publicationCitation" : { + "typeName" : "publicationCitation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RelatedPublicationCitation2" + }, + "publicationIDType" : { + "typeName" : "publicationIDType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "arXiv" + }, + "publicationIDNumber" : { + "typeName" : "publicationIDNumber", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RelatedPublicationIDNumber2" + }, + "publicationURL" : { + "typeName" : "publicationURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://RelatedPublicationURL2.org" + } + } + ] + }, { + "typeName" : "notesText", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Notes1" + }, { + "typeName" : "language", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Abkhaz", + "Afar" + ] + }, { + "typeName" : "producer", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "producerName" : { + "typeName" : "producerName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastProducer1, FirstProducer1" + }, + "producerAffiliation" : { + "typeName" : "producerAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ProducerAffiliation1" + }, + "producerAbbreviation" : { + "typeName" : "producerAbbreviation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ProducerAbbreviation1" + }, + "producerURL" : { + "typeName" : "producerURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://ProducerURL1.org" + }, + "producerLogoURL" : { + "typeName" : "producerLogoURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://ProducerLogoURL1.org" + } + }, { + "producerName" : { + "typeName" : "producerName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastProducer2, FirstProducer2" + }, + "producerAffiliation" : { + "typeName" : "producerAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ProducerAffiliation2" + }, + "producerAbbreviation" : { + "typeName" : "producerAbbreviation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ProducerAbbreviation2" + }, + "producerURL" : { + "typeName" : "producerURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://ProducerURL2.org" + }, + "producerLogoURL" : { + "typeName" : "producerLogoURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://ProducerLogoURL2.org" + } + } + ] + }, { + "typeName" : "productionDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1003-01-01" + }, { + "typeName" : "productionPlace", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "ProductionPlace" + ] + }, { + "typeName" : "contributor", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "contributorType" : { + "typeName" : "contributorType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "Data Collector" + }, + "contributorName" : { + "typeName" : "contributorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastContributor1, FirstContributor1" + } + }, { + "contributorType" : { + "typeName" : "contributorType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "Data Curator" + }, + "contributorName" : { + "typeName" : "contributorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastContributor2, FirstContributor2" + } + } + ] + }, { + "typeName" : "grantNumber", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "grantNumberAgency" : { + "typeName" : "grantNumberAgency", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GrantInformationGrantAgency1" + }, + "grantNumberValue" : { + "typeName" : "grantNumberValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GrantInformationGrantNumber1" + } + }, { + "grantNumberAgency" : { + "typeName" : "grantNumberAgency", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GrantInformationGrantAgency2" + }, + "grantNumberValue" : { + "typeName" : "grantNumberValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GrantInformationGrantNumber2" + } + } + ] + }, { + "typeName" : "distributor", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "distributorName" : { + "typeName" : "distributorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastDistributor1, FirstDistributor1" + }, + "distributorAffiliation" : { + "typeName" : "distributorAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DistributorAffiliation1" + }, + "distributorAbbreviation" : { + "typeName" : "distributorAbbreviation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DistributorAbbreviation1" + }, + "distributorURL" : { + "typeName" : "distributorURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://DistributorURL1.org" + }, + "distributorLogoURL" : { + "typeName" : "distributorLogoURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://DistributorLogoURL1.org" + } + }, { + "distributorName" : { + "typeName" : "distributorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastDistributor2, FirstDistributor2" + }, + "distributorAffiliation" : { + "typeName" : "distributorAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DistributorAffiliation2" + }, + "distributorAbbreviation" : { + "typeName" : "distributorAbbreviation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DistributorAbbreviation2" + }, + "distributorURL" : { + "typeName" : "distributorURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://DistributorURL2.org" + }, + "distributorLogoURL" : { + "typeName" : "distributorLogoURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://DistributorLogoURL2.org" + } + } + ] + }, { + "typeName" : "distributionDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1004-01-01" + }, { + "typeName" : "depositor", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastDepositor, FirstDepositor" + }, { + "typeName" : "dateOfDeposit", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1002-01-01" + }, { + "typeName" : "timePeriodCovered", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "timePeriodCoveredStart" : { + "typeName" : "timePeriodCoveredStart", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1005-01-01" + }, + "timePeriodCoveredEnd" : { + "typeName" : "timePeriodCoveredEnd", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1005-01-02" + } + }, { + "timePeriodCoveredStart" : { + "typeName" : "timePeriodCoveredStart", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1005-02-01" + }, + "timePeriodCoveredEnd" : { + "typeName" : "timePeriodCoveredEnd", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1005-02-02" + } + } + ] + }, { + "typeName" : "dateOfCollection", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "dateOfCollectionStart" : { + "typeName" : "dateOfCollectionStart", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1006-01-01" + }, + "dateOfCollectionEnd" : { + "typeName" : "dateOfCollectionEnd", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1006-01-01" + } + }, { + "dateOfCollectionStart" : { + "typeName" : "dateOfCollectionStart", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1006-02-01" + }, + "dateOfCollectionEnd" : { + "typeName" : "dateOfCollectionEnd", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1006-02-02" + } + } + ] + }, { + "typeName" : "kindOfData", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "KindOfData1", + "KindOfData2" + ] + }, { + "typeName" : "series", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "seriesName" : { + "typeName" : "seriesName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SeriesName" + }, + "seriesInformation" : { + "typeName" : "seriesInformation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SeriesInformation" + } + } + ] + }, { + "typeName" : "software", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "softwareName" : { + "typeName" : "softwareName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SoftwareName1" + }, + "softwareVersion" : { + "typeName" : "softwareVersion", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SoftwareVersion1" + } + }, { + "softwareName" : { + "typeName" : "softwareName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SoftwareName2" + }, + "softwareVersion" : { + "typeName" : "softwareVersion", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SoftwareVersion2" + } + } + ] + }, { + "typeName" : "relatedMaterial", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "RelatedMaterial1", + "RelatedMaterial2" + ] + }, { + "typeName" : "relatedDatasets", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "RelatedDatasets1", + "RelatedDatasets2" + ] + }, { + "typeName" : "otherReferences", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "OtherReferences1", + "OtherReferences2" + ] + }, { + "typeName" : "dataSources", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "DataSources1", + "DataSources2" + ] + }, { + "typeName" : "originOfSources", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OriginOfSources" + }, { + "typeName" : "characteristicOfSources", + "multiple" : false, + "typeClass" : "primitive", + "value" : "CharacteristicOfSourcesNoted" + }, { + "typeName" : "accessToSources", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DocumentationAndAccessToSources" + } + ] + }, + "geospatial" : { + "displayName" : "Geospatial Metadata", + "fields" : [{ + "typeName" : "geographicCoverage", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "country" : { + "typeName" : "country", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "Afghanistan" + }, + "state" : { + "typeName" : "state", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageStateProvince1" + }, + "city" : { + "typeName" : "city", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageCity1" + }, + "otherGeographicCoverage" : { + "typeName" : "otherGeographicCoverage", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageOther1" + } + }, { + "country" : { + "typeName" : "country", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "Albania" + }, + "state" : { + "typeName" : "state", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageStateProvince2" + }, + "city" : { + "typeName" : "city", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageCity2" + }, + "otherGeographicCoverage" : { + "typeName" : "otherGeographicCoverage", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageOther2" + } + } + ] + }, { + "typeName" : "geographicUnit", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "GeographicUnit1", + "GeographicUnit2" + ] + }, { + "typeName" : "geographicBoundingBox", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "westLongitude" : { + "typeName" : "westLongitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "-72" + }, + "eastLongitude" : { + "typeName" : "eastLongitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "-70" + }, + "northLatitude" : { + "typeName" : "northLatitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "43" + }, + "southLatitude" : { + "typeName" : "southLatitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "42" + } + }, { + "westLongitude" : { + "typeName" : "westLongitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "-18" + }, + "eastLongitude" : { + "typeName" : "eastLongitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "-13" + }, + "northLatitude" : { + "typeName" : "northLatitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "29" + }, + "southLatitude" : { + "typeName" : "southLatitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "28" + } + } + ] + } + ] + }, + "socialscience" : { + "displayName" : "Social Science and Humanities Metadata", + "fields" : [{ + "typeName" : "unitOfAnalysis", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "UnitOfAnalysis1", + "UnitOfAnalysis2" + ] + }, { + "typeName" : "universe", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Universe1", + "Universe2" + ] + }, { + "typeName" : "timeMethod", + "multiple" : false, + "typeClass" : "primitive", + "value" : "TimeMethod" + }, { + "typeName" : "dataCollector", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastDataCollector1, FirstDataCollector1" + }, { + "typeName" : "collectorTraining", + "multiple" : false, + "typeClass" : "primitive", + "value" : "CollectorTraining" + }, { + "typeName" : "frequencyOfDataCollection", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Frequency" + }, { + "typeName" : "samplingProcedure", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SamplingProcedure" + }, { + "typeName" : "targetSampleSize", + "multiple" : false, + "typeClass" : "compound", + "value" : { + "targetSampleActualSize" : { + "typeName" : "targetSampleActualSize", + "multiple" : false, + "typeClass" : "primitive", + "value" : "100" + }, + "targetSampleSizeFormula" : { + "typeName" : "targetSampleSizeFormula", + "multiple" : false, + "typeClass" : "primitive", + "value" : "TargetSampleSizeFormula" + } + } + }, { + "typeName" : "deviationsFromSampleDesign", + "multiple" : false, + "typeClass" : "primitive", + "value" : "MajorDeviationsForSampleDesign" + }, { + "typeName" : "collectionMode", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "CollectionMode" + ] + }, { + "typeName" : "researchInstrument", + "multiple" : false, + "typeClass" : "primitive", + "value" : "TypeOfResearchInstrument" + }, { + "typeName" : "dataCollectionSituation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "CharacteristicsOfDataCollectionSituation" + }, { + "typeName" : "actionsToMinimizeLoss", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ActionsToMinimizeLosses" + }, { + "typeName" : "controlOperations", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ControlOperations" + }, { + "typeName" : "weighting", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Weighting" + }, { + "typeName" : "cleaningOperations", + "multiple" : false, + "typeClass" : "primitive", + "value" : "CleaningOperations" + }, { + "typeName" : "datasetLevelErrorNotes", + "multiple" : false, + "typeClass" : "primitive", + "value" : "StudyLevelErrorNotes" + }, { + "typeName" : "responseRate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ResponseRate" + }, { + "typeName" : "samplingErrorEstimates", + "multiple" : false, + "typeClass" : "primitive", + "value" : "EstimatesOfSamplingError" + }, { + "typeName" : "otherDataAppraisal", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherFormsOfDataAppraisal" + }, { + "typeName" : "socialScienceNotes", + "multiple" : false, + "typeClass" : "compound", + "value" : { + "socialScienceNotesType" : { + "typeName" : "socialScienceNotesType", + "multiple" : false, + "typeClass" : "primitive", + "value" : "NotesType" + }, + "socialScienceNotesSubject" : { + "typeName" : "socialScienceNotesSubject", + "multiple" : false, + "typeClass" : "primitive", + "value" : "NotesSubject" + }, + "socialScienceNotesText" : { + "typeName" : "socialScienceNotesText", + "multiple" : false, + "typeClass" : "primitive", + "value" : "NotesText" + } + } + } + ] + }, + "astrophysics" : { + "displayName" : "Astronomy and Astrophysics Metadata", + "fields" : [{ + "typeName" : "astroType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Image", + "Mosaic", + "EventList", + "Cube" + ] + }, { + "typeName" : "astroFacility", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Facility1", + "Facility2" + ] + }, { + "typeName" : "astroInstrument", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Instrument1", + "Instrument2" + ] + }, { + "typeName" : "astroObject", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Object1", + "Object2" + ] + }, { + "typeName" : "resolution.Spatial", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SpatialResolution" + }, { + "typeName" : "resolution.Spectral", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SpectralResolution" + }, { + "typeName" : "resolution.Temporal", + "multiple" : false, + "typeClass" : "primitive", + "value" : "TimeResolution" + }, { + "typeName" : "coverage.Spectral.Bandpass", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Bandpass1", + "Bandpass2" + ] + }, { + "typeName" : "coverage.Spectral.CentralWavelength", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "3001", + "3002" + ] + }, { + "typeName" : "coverage.Spectral.Wavelength", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "coverage.Spectral.MinimumWavelength" : { + "typeName" : "coverage.Spectral.MinimumWavelength", + "multiple" : false, + "typeClass" : "primitive", + "value" : "4001" + }, + "coverage.Spectral.MaximumWavelength" : { + "typeName" : "coverage.Spectral.MaximumWavelength", + "multiple" : false, + "typeClass" : "primitive", + "value" : "4002" + } + }, { + "coverage.Spectral.MinimumWavelength" : { + "typeName" : "coverage.Spectral.MinimumWavelength", + "multiple" : false, + "typeClass" : "primitive", + "value" : "4003" + }, + "coverage.Spectral.MaximumWavelength" : { + "typeName" : "coverage.Spectral.MaximumWavelength", + "multiple" : false, + "typeClass" : "primitive", + "value" : "4004" + } + } + ] + }, { + "typeName" : "coverage.Temporal", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "coverage.Temporal.StartTime" : { + "typeName" : "coverage.Temporal.StartTime", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1007-01-01" + }, + "coverage.Temporal.StopTime" : { + "typeName" : "coverage.Temporal.StopTime", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1007-01-02" + } + }, { + "coverage.Temporal.StartTime" : { + "typeName" : "coverage.Temporal.StartTime", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1007-02-01" + }, + "coverage.Temporal.StopTime" : { + "typeName" : "coverage.Temporal.StopTime", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1007-02-02" + } + } + ] + }, { + "typeName" : "coverage.Spatial", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "SkyCoverage1", + "SkyCoverage2" + ] + }, { + "typeName" : "coverage.Depth", + "multiple" : false, + "typeClass" : "primitive", + "value" : "200" + }, { + "typeName" : "coverage.ObjectDensity", + "multiple" : false, + "typeClass" : "primitive", + "value" : "300" + }, { + "typeName" : "coverage.ObjectCount", + "multiple" : false, + "typeClass" : "primitive", + "value" : "400" + }, { + "typeName" : "coverage.SkyFraction", + "multiple" : false, + "typeClass" : "primitive", + "value" : "500" + }, { + "typeName" : "coverage.Polarization", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Polarization" + }, { + "typeName" : "redshiftType", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RedshiftType" + }, { + "typeName" : "resolution.Redshift", + "multiple" : false, + "typeClass" : "primitive", + "value" : "600" + }, { + "typeName" : "coverage.RedshiftValue", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "coverage.Redshift.MinimumValue" : { + "typeName" : "coverage.Redshift.MinimumValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "701" + }, + "coverage.Redshift.MaximumValue" : { + "typeName" : "coverage.Redshift.MaximumValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "702" + } + }, { + "coverage.Redshift.MinimumValue" : { + "typeName" : "coverage.Redshift.MinimumValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "703" + }, + "coverage.Redshift.MaximumValue" : { + "typeName" : "coverage.Redshift.MaximumValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "704" + } + } + ] + } + ] + }, + "biomedical" : { + "displayName" : "Life Sciences Metadata", + "fields" : [{ + "typeName" : "studyDesignType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Case Control", + "Cross Sectional", + "Cohort Study", + "Not Specified" + ] + }, { + "typeName" : "studyFactorType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Age", + "Biomarkers", + "Cell Surface Markers", + "Developmental Stage" + ] + }, { + "typeName" : "studyAssayOrganism", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Arabidopsis thaliana", + "Bos taurus", + "Caenorhabditis elegans", + "Danio rerio (zebrafish)" + ] + }, { + "typeName" : "studyAssayOtherOrganism", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "OtherOrganism1", + "OtherOrganism2" + ] + }, { + "typeName" : "studyAssayMeasurementType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "genome sequencing", + "cell sorting", + "clinical chemistry analysis", + "DNA methylation profiling" + ] + }, { + "typeName" : "studyAssayOtherMeasurmentType", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "OtherMeasurementType1", + "OtherMeasurementType2" + ] + }, { + "typeName" : "studyAssayTechnologyType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "culture based drug susceptibility testing, single concentration", + "culture based drug susceptibility testing, two concentrations", + "culture based drug susceptibility testing, three or more concentrations (minimium inhibitory concentration measurement)", + "flow cytometry" + ] + }, { + "typeName" : "studyAssayPlatform", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "210-MS GC Ion Trap (Varian)", + "220-MS GC Ion Trap (Varian)", + "225-MS GC Ion Trap (Varian)", + "300-MS quadrupole GC/MS (Varian)" + ] + }, { + "typeName" : "studyAssayCellType", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "CellType1", + "CellType2" + ] + } + ] + }, + "journal" : { + "displayName" : "Journal Metadata", + "fields" : [{ + "typeName" : "journalVolumeIssue", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "journalVolume" : { + "typeName" : "journalVolume", + "multiple" : false, + "typeClass" : "primitive", + "value" : "JournalVolume1" + }, + "journalIssue" : { + "typeName" : "journalIssue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "JournalIssue1" + }, + "journalPubDate" : { + "typeName" : "journalPubDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1008-01-01" + } + }, { + "journalVolume" : { + "typeName" : "journalVolume", + "multiple" : false, + "typeClass" : "primitive", + "value" : "JournalVolume2" + }, + "journalIssue" : { + "typeName" : "journalIssue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "JournalIssue2" + }, + "journalPubDate" : { + "typeName" : "journalPubDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1008-02-01" + } + } + ] + }, { + "typeName" : "journalArticleType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "abstract" + } + ] + } + } + } +} \ No newline at end of file diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 18354f2b1f7..db4f1c9f30c 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -31,55 +31,56 @@ topicClassVocab Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 27 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation topicClassVocabURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 28 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation publication Related Publication The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab none 29 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/isReferencedBy - publicationCitation Citation The full bibliographic citation for the related publication textbox 30 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation - publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 31 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme - publicationIDNumber Identifier The identifier for a related publication text 32 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier - publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 33 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE publication citation https://schema.org/distribution - notesText Notes Additional information about the Dataset textbox 34 FALSE FALSE FALSE FALSE TRUE FALSE citation - language Language A language that the Dataset's files is written in text 35 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language - producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 36 FALSE FALSE TRUE FALSE FALSE FALSE citation - producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 37 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation - producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 38 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 39 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerURL URL The URL of the producer's website https:// url 40 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerLogoURL Logo URL The URL of the producer's logo https:// url 41
FALSE FALSE FALSE FALSE FALSE FALSE producer citation - productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 42 TRUE FALSE FALSE TRUE FALSE FALSE citation - productionPlace Production Location The location where the data and any related materials were produced or collected text 43 TRUE FALSE TRUE TRUE FALSE FALSE citation - contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 44 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor - contributorType Type Indicates the type of contribution made to the dataset text 45 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation - contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 46 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation - grantNumber Funding Information Information about the Dataset's financial support none 47 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor - grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 48 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 49 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 50 FALSE FALSE TRUE FALSE FALSE FALSE citation - distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 51 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation - distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 52 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 53 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorURL URL The URL of the distributor's webpage https:// url 54 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 55
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 56 TRUE FALSE FALSE TRUE FALSE FALSE citation - depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 57 FALSE FALSE FALSE FALSE FALSE FALSE citation - dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 58 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted - timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 59 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage - timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 60 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 61 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - dateOfCollection Date of Collection The dates when the data were collected or generated none 62 ; FALSE FALSE TRUE FALSE FALSE FALSE citation - dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 63 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 64 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 65 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData - series Series Information about the dataset series to which the Dataset belong none 66 : FALSE FALSE TRUE FALSE FALSE FALSE citation - seriesName Name The name of the dataset series text 67 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation - seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 68 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation - software Software Information about the software used to generate the Dataset none 69 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy - softwareName Name The name of software used to generate the Dataset text 70 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation - softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 71 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation - relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 72 FALSE FALSE TRUE FALSE FALSE FALSE citation - relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 73 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation - otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 74 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references - dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 75 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom - originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation - characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation - accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 78 FALSE FALSE FALSE FALSE FALSE FALSE citation + publicationRelationType Relation Type The nature of the relationship between this Dataset and the related publication text 30 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://datacite.org/schema/kernel-4/simpleTypes#relationType + publicationCitation Citation The full bibliographic citation for the related publication textbox 31 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation + publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 32 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme + publicationIDNumber Identifier The identifier for a related publication text 33 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier + publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 34 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE publication citation https://schema.org/distribution + notesText Notes Additional information about the Dataset textbox 35 FALSE FALSE FALSE FALSE TRUE FALSE citation + language Language A language that the Dataset's files is written in text 36 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language + producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 37 FALSE FALSE TRUE FALSE FALSE FALSE citation + producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 38 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation + producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 39 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 40 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerURL URL The URL of the producer's website https:// url 41 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerLogoURL Logo URL The URL of the producer's logo https:// url 42
FALSE FALSE FALSE FALSE FALSE FALSE producer citation + productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 43 TRUE FALSE FALSE TRUE FALSE FALSE citation + productionPlace Production Location The location where the data and any related materials were produced or collected text 44 TRUE FALSE TRUE TRUE FALSE FALSE citation + contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 45 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor + contributorType Type Indicates the type of contribution made to the dataset text 46 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation + contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 47 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation + grantNumber Funding Information Information about the Dataset's financial support none 48 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor + grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 49 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 50 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 51 FALSE FALSE TRUE FALSE FALSE FALSE citation + distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 52 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation + distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 53 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 54 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorURL URL The URL of the distributor's webpage https:// url 55 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 56
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 57 TRUE FALSE FALSE TRUE FALSE FALSE citation + depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 58 FALSE FALSE FALSE FALSE FALSE FALSE citation + dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 59 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted + timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 60 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage + timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 61 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 62 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + dateOfCollection Date of Collection The dates when the data were collected or generated none 63 ; FALSE FALSE TRUE FALSE FALSE FALSE citation + dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 64 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 65 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 66 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData + series Series Information about the dataset series to which the Dataset belong none 67 : FALSE FALSE TRUE FALSE FALSE FALSE citation + seriesName Name The name of the dataset series text 68 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation + seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 69 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation + software Software Information about the software used to generate the Dataset none 70 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy + softwareName Name The name of software used to generate the Dataset text 71 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 72 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 73 FALSE FALSE TRUE FALSE FALSE FALSE citation + relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 74 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation + otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 75 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references + dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 76 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom + originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation + characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 78 FALSE FALSE FALSE FALSE FALSE FALSE citation + accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 79 FALSE FALSE FALSE FALSE FALSE FALSE citation #controlledVocabulary DatasetField Value identifier displayOrder subject Agricultural Sciences D01 0 subject Arts and Humanities D0 1 @@ -325,3 +326,9 @@ language Zhuang, Chuang zha 183 zha za Zhuang Chuang language Zulu zul 184 zul zu language Not applicable 185 + publicationRelationType IsCitedBy RT1 1 + publicationRelationType Cites RT2 2 + publicationRelationType IsSupplementTo RT3 3 + publicationRelationType IsSupplementedBy RT4 4 + publicationRelationType IsReferencedBy RT5 5 + publicationRelationType References RT6 6 \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java index 1313f3415ab..53ab6c7bef7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java @@ -40,6 +40,7 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String note = "note"; + public final static String publicationRelationType = "publicationRelationType"; public final static String publicationCitation = "publicationCitation"; public final static String publicationIDType = "publicationIDType"; public final static String publicationIDNumber = "publicationIDNumber"; diff --git a/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java index 6b7cb844f3e..e74a2f26af6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java @@ -28,6 +28,8 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; +import org.apache.commons.lang3.StringUtils; + /** * * @author skraffmi @@ -301,26 +303,35 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO String IDType = ""; String IDNo = ""; String url = ""; + String relationType = null; for (Iterator iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); - if (DatasetFieldConstant.publicationCitation.equals(next.getTypeName())) { - citation = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationIDType.equals(next.getTypeName())) { - IDType = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationIDNumber.equals(next.getTypeName())) { - IDNo = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationURL.equals(next.getTypeName())) { - url = next.getSinglePrimitive(); + switch (next.getTypeName()) { + case DatasetFieldConstant.publicationCitation: + citation = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationIDType: + IDType = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationIDNumber: + IDNo = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationURL: + url = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationRelationType: + relationType = next.getSinglePrimitive(); + break; } } + if(StringUtils.isBlank(relationType)) { + relationType = "isReferencedBy"; + } pubString = appendCommaSeparatedValue(citation, IDType); pubString = appendCommaSeparatedValue(pubString, IDNo); pubString = appendCommaSeparatedValue(pubString, url); if (!pubString.isEmpty()){ - xmlw.writeStartElement(dcFlavor+":"+"isReferencedBy"); + xmlw.writeStartElement(dcFlavor+":" + relationType); xmlw.writeCharacters(pubString); xmlw.writeEndElement(); //relPubl } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index b4b5e597365..dd01750942d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -932,6 +932,7 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe String relatedIdentifierType = null; String relatedIdentifier = null; // is used when relatedIdentifierType variable is not URL String relatedURL = null; // is used when relatedIdentifierType variable is URL + String relationType = null; // is used when relatedIdentifierType variable is URL for (Iterator iterator = fieldDTOs.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); @@ -944,6 +945,9 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe if (DatasetFieldConstant.publicationURL.equals(next.getTypeName())) { relatedURL = next.getSinglePrimitive(); } + if (DatasetFieldConstant.publicationRelationType.equals(next.getTypeName())) { + relationType = next.getSinglePrimitive(); + } } if (StringUtils.isNotBlank(relatedIdentifierType)) { @@ -956,7 +960,10 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe } relatedIdentifier_map.put("relatedIdentifierType", relatedIdentifierType); - relatedIdentifier_map.put("relationType", "IsCitedBy"); + if(relationType== null) { + relationType = "IsCitedBy"; + } + relatedIdentifier_map.put("relationType", relationType); if (StringUtils.containsIgnoreCase(relatedIdentifierType, "url")) { writeFullElement(xmlw, null, "relatedIdentifier", relatedIdentifier_map, relatedURL, language); diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index cb864eb78e9..a52a599cff3 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -31,6 +31,7 @@ datasetfieldtype.topicClassValue.title=Term datasetfieldtype.topicClassVocab.title=Controlled Vocabulary Name datasetfieldtype.topicClassVocabURI.title=Controlled Vocabulary URL datasetfieldtype.publication.title=Related Publication +datasetfieldtype.publicationRelationType.title=Relation Type datasetfieldtype.publicationCitation.title=Citation datasetfieldtype.publicationIDType.title=Identifier Type datasetfieldtype.publicationIDNumber.title=Identifier @@ -110,6 +111,7 @@ datasetfieldtype.topicClassValue.description=A topic or subject term datasetfieldtype.topicClassVocab.description=The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) datasetfieldtype.topicClassVocabURI.description=The URL where one can access information about the term's controlled vocabulary datasetfieldtype.publication.description=The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab +datasetfieldtype.publicationRelationType.description=The nature of the relationship between this Dataset and the related publication datasetfieldtype.publicationCitation.description=The full bibliographic citation for the related publication datasetfieldtype.publicationIDType.description=The type of identifier that uniquely identifies a related publication datasetfieldtype.publicationIDNumber.description=The identifier for a related publication diff --git a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java index 2da15147255..8350c5d9875 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java @@ -617,7 +617,7 @@ public void testWriteRelatedIdentifierElement() throws XMLStreamException, IOExc //then assertEquals("" - + "" + + "" + "RelatedPublicationIDNumber1" + "" + "RelatedPublicationIDNumber2" From 93faadeb7aaa8093369d327cc0394c49ee090850 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 13:24:58 -0400 Subject: [PATCH 137/622] missing element for openaireutil test --- .../harvard/iq/dataverse/export/dataset-all-defaults.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt index 431f069cb03..6b3c7689bbf 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt +++ b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt @@ -324,6 +324,12 @@ "typeClass": "compound", "value": [ { + "publicationRelationType": { + "typeName": "publicationRelationType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "IsSupplementTo" + }, "publicationCitation": { "typeName": "publicationCitation", "multiple": false, From c9084e3058045fb54fd960a203588f371ce59b2c Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 13:57:02 -0400 Subject: [PATCH 138/622] contributor type null fix --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 9ba1e4e3116..92bcf8b481f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -546,7 +546,10 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X contributor = subField.getValue(); break; case DatasetFieldConstant.contributorType: - contributorType = subField.getValue().replace(" ", ""); + contributorType = subField.getValue(); + if(contributorType!=null) { + contributorType = contributorType.replace(" ", ""); + } break; } } From cdd6d6fb4357fe2e63dbf597faf57d59c8625670 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 16:05:40 -0400 Subject: [PATCH 139/622] add relationType to base code and DataCite XML --- .../iq/dataverse/DatasetRelPublication.java | 29 ++++++++++----- .../harvard/iq/dataverse/DatasetVersion.java | 37 +++++++++++++------ .../pidproviders/doi/XmlMetadataTemplate.java | 12 ++++-- 3 files changed, 53 insertions(+), 25 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java b/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java index 7680ebc16db..a0696ab38d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java @@ -6,7 +6,6 @@ package edu.harvard.iq.dataverse; - /** * * @author skraffmiller @@ -25,10 +24,12 @@ public class DatasetRelPublication { private String description; private boolean replicationData; private int displayOrder; + private String relationType; public int getDisplayOrder() { return displayOrder; } + public void setDisplayOrder(int displayOrder) { this.displayOrder = displayOrder; } @@ -64,8 +65,7 @@ public String getUrl() { public void setUrl(String url) { this.url = url; } - - + public String getTitle() { return title; } @@ -82,12 +82,21 @@ public void setDescription(String description) { this.description = description; } - public boolean isEmpty() { - return ((text==null || text.trim().equals("")) - && (!replicationData) - && (idType==null || idType.trim().equals("")) - && (idNumber==null || idNumber.trim().equals("")) - && (url==null || url.trim().equals(""))); - } + public void setRelationType(String type) { + relationType = type; + + } + + public String getRelationType() { + return relationType; + } + + public boolean isEmpty() { + return ((text == null || text.trim().equals("")) + && (!replicationData) + && (idType == null || idType.trim().equals("")) + && (idNumber == null || idNumber.trim().equals("")) + && (url == null || url.trim().equals(""))); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 6648419216d..eb6fdd4e923 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1356,24 +1356,37 @@ public List getRelatedPublications() { for (DatasetFieldCompoundValue publication : dsf.getDatasetFieldCompoundValues()) { DatasetRelPublication relatedPublication = new DatasetRelPublication(); for (DatasetField subField : publication.getChildDatasetFields()) { - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationCitation)) { - String citation = subField.getDisplayValue(); - relatedPublication.setText(citation); - } - - - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationURL)) { - // We have to avoid using subField.getDisplayValue() here - because the DisplayFormatType - // for this url metadata field is likely set up so that the display value is automatically - // turned into a clickable HTML HREF block, which we don't want to end in our Schema.org JSON-LD output. - // So we want to use the raw value of the field instead, with - // minimal HTML sanitation, just in case (this would be done on all URLs in getDisplayValue()). + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.publicationCitation: + relatedPublication.setText(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationURL: + // We have to avoid using subField.getDisplayValue() here - because the DisplayFormatType + // for this url metadata field is likely set up so that the display value is automatically + // turned into a clickable HTML HREF block, which we don't want to end in our Schema.org + // JSON-LD output. So we want to use the raw value of the field instead, with minimal HTML + // sanitation, just in case (this would be done on all URLs in getDisplayValue()). String url = subField.getValue(); if (StringUtils.isBlank(url) || DatasetField.NA_VALUE.equals(url)) { relatedPublication.setUrl(""); } else { relatedPublication.setUrl(MarkupChecker.sanitizeBasicHTML(url)); } + break; + case DatasetFieldConstant.publicationIDType: + // QDR idType has a trailing : now (Aug 2021) + // Get sanitized value without any display modifications + subField.getDatasetFieldType().setDisplayFormat("#VALUE"); + relatedPublication.setIdType(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationIDNumber: + // Get sanitized value without any display modifications + subField.getDatasetFieldType().setDisplayFormat("#VALUE"); + relatedPublication.setIdNumber(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationRelationType: + relatedPublication.setRelationType(subField.getDisplayValue()); + break; } } relatedPublications.add(relatedPublication); diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 92bcf8b481f..03d4de99691 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -4,6 +4,8 @@ import java.io.IOException; import java.io.OutputStream; import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.text.ParseException; import java.util.ArrayList; @@ -872,6 +874,10 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th String pubIdType = relatedPub.getIdType(); String identifier = relatedPub.getIdNumber(); String url = relatedPub.getUrl(); + String relationType = relatedPub.getRelationType(); + if(StringUtils.isBlank(relationType)) { + relationType = "IsSupplementTo"; + } /* * Note - with identifier and url fields, it's not clear that there's a single * way those two fields are used for all identifier types. The code here is @@ -921,13 +927,13 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th // For non-URL types, if a URL is given, split the string to get a schemeUri try { - URL relatedUrl = new URL(relatedIdentifier); + URL relatedUrl = new URI(relatedIdentifier).toURL(); String protocol = relatedUrl.getProtocol(); String authority = relatedUrl.getAuthority(); String site = String.format("%s://%s", protocol, authority); relatedIdentifier = relatedIdentifier.substring(site.length()); attributes.put("schemeURI", site); - } catch (MalformedURLException e) { + } catch (URISyntaxException | MalformedURLException e) { // Just an identifier } } @@ -937,7 +943,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th if (pubIdType != null) { attributes.put("relatedIdentifierType", pubIdType); } - attributes.put("relationType", "IsSupplementTo"); + attributes.put("relationType", relationType); relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, relatedIdentifier); } From 360d3fac049239cfc4f41b6be83b8ecc0b16b475 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 16:05:57 -0400 Subject: [PATCH 140/622] add relationType to above fold display --- .../iq/dataverse/DatasetVersionUI.java | 32 +++++++++++++------ src/main/webapp/dataset.xhtml | 6 ++-- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java index 55b98c178bb..975de391d8c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java @@ -62,14 +62,14 @@ public void setMetadataBlocksForEdit(TreeMap> public DatasetVersionUI initDatasetVersionUI(DatasetVersion datasetVersion, boolean createBlanks) { /*takes in the values of a dataset version and apportions them into lists for - viewing and editng in the dataset page. + viewing and editing in the dataset page. */ setDatasetVersion(datasetVersion); //this.setDatasetAuthors(new ArrayList()); this.setDatasetRelPublications(new ArrayList<>()); - // loop through vaues to get fields for view mode + // loop through values to get fields for view mode for (DatasetField dsf : datasetVersion.getDatasetFields()) { //Special Handling for various fields displayed above tabs in dataset page view. if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.title)) { @@ -114,17 +114,23 @@ public DatasetVersionUI initDatasetVersionUI(DatasetVersion datasetVersion, boo datasetRelPublication.setTitle(dsf.getDatasetFieldType().getLocaleTitle()); datasetRelPublication.setDescription(dsf.getDatasetFieldType().getLocaleDescription()); for (DatasetField subField : relPubVal.getChildDatasetFields()) { - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationCitation)) { + String value = subField.getValue(); + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.publicationCitation: datasetRelPublication.setText(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationIDNumber)) { + break; + case DatasetFieldConstant.publicationIDNumber: datasetRelPublication.setIdNumber(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationIDType)) { + break; + case DatasetFieldConstant.publicationIDType: datasetRelPublication.setIdType(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationURL)) { + break; + case DatasetFieldConstant.publicationURL: datasetRelPublication.setUrl(subField.getValue()); + break; + case DatasetFieldConstant.publicationRelationType: + datasetRelPublication.setRelationType(subField.getValue()); + break; } } this.getDatasetRelPublications().add(datasetRelPublication); @@ -263,6 +269,14 @@ public String getRelPublicationUrl() { } } + public String getRelPublicationRelationType() { + if (!this.datasetRelPublications.isEmpty()) { + return this.getDatasetRelPublications().get(0).getRelationType(); + } else { + return ""; + } + } + public String getUNF() { //todo get UNF to calculate and display here. return ""; diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 13faf9d7f20..4fd91f24c36 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -631,6 +631,7 @@ or !empty DatasetPage.datasetVersionUI.keywordDisplay or !empty DatasetPage.datasetVersionUI.subject.value or !empty DatasetPage.datasetVersionUI.relPublicationCitation + or !empty DatasetPage.datasetVersionUI.relPublicationUrl or !empty DatasetPage.datasetVersionUI.notes.value) and !empty DatasetPage.datasetSummaryFields}"> @@ -650,8 +651,9 @@ data-toggle="tooltip" data-placement="auto right" data-original-title="#{DatasetPage.datasetVersionUI.datasetRelPublications.get(0).description}">