From 2d884ca8532d9e4304d098c5e22b6e16b5532a91 Mon Sep 17 00:00:00 2001 From: stevenferey Date: Wed, 6 Nov 2024 17:10:58 +0100 Subject: [PATCH 1/2] Header id support for harvesting --- .../api/imports/ImportGenericServiceBean.java | 34 +- .../api/imports/ImportServiceBean.java | 2 +- .../imports/ImportGenericServiceBeanTest.java | 58 +++- .../json/JsonImportGenericWithOtherId.txt | 307 ++++++++++++++++++ .../json/JsonImportGenericWithoutOtherId.txt | 258 +++++++++++++++ 5 files changed, 652 insertions(+), 7 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithOtherId.txt create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithoutOtherId.txt diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java index d32a548c8bf..bc7e9138021 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java @@ -11,7 +11,8 @@ import edu.harvard.iq.dataverse.ForeignMetadataFieldMapping; import edu.harvard.iq.dataverse.ForeignMetadataFormatMapping; import edu.harvard.iq.dataverse.MetadataBlockServiceBean; -import edu.harvard.iq.dataverse.api.dto.*; +import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; +import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; import edu.harvard.iq.dataverse.dataset.DatasetTypeServiceBean; @@ -29,7 +30,6 @@ import java.io.IOException; import java.io.StringReader; import java.util.*; -import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; import jakarta.ejb.EJB; @@ -155,7 +155,14 @@ public DatasetDTO processXML( XMLStreamReader xmlr, ForeignMetadataFormatMapping // It is distributed as required content, in reference_data.sql. // Note that arbitrary formatting tags are supported for the outer xml // wrapper. -- L.A. 4.5 - public DatasetDTO processOAIDCxml(String DcXmlToParse) throws XMLStreamException { + /** + * + * @param DcXmlToParse Metadata contained in the tag + * @param harvestIdentifier Header harvesting id + * @return datasetDTO with metadata filled in + * @throws XMLStreamException + */ + public DatasetDTO processOAIDCxml(String DcXmlToParse, String harvestIdentifier) throws XMLStreamException { // look up DC metadata mapping: ForeignMetadataFormatMapping dublinCoreMapping = findFormatMappingByName(DCTERMS); @@ -189,7 +196,7 @@ public DatasetDTO processOAIDCxml(String DcXmlToParse) throws XMLStreamException // as an "other id". In the context of OAI harvesting, we expect // the identifier to be a global id, so we need to rearrange that: - String identifier = getOtherIdFromDTO(datasetDTO.getDatasetVersion()); + String identifier = getIdentifierHarvestableByDataverse(datasetDTO.getDatasetVersion(), harvestIdentifier); logger.fine("Imported identifier: "+identifier); String globalIdentifier = reassignIdentifierAsGlobalId(identifier, datasetDTO); @@ -335,7 +342,13 @@ private FieldDTO makeDTO(DatasetFieldType dataverseFieldType, FieldDTO value, St return value; } - private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) { + /** + * + * @param datasetVersionDTO + * @param harvestIdentifier Header harvesting id + * @return + */ + public String getIdentifierHarvestableByDataverse(DatasetVersionDTO datasetVersionDTO, String harvestIdentifier) { List otherIds = new ArrayList<>(); for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); @@ -354,6 +367,12 @@ private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) { } } } + + // The identifier is possibly declared only in the header, so we add it to the list + if (harvestIdentifier != null) { + otherIds.add(harvestIdentifier); + } + if (!otherIds.isEmpty()) { // We prefer doi or hdl identifiers like "doi:10.7910/DVN/1HE30F" for (String otherId : otherIds) { @@ -384,6 +403,11 @@ private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) { //ToDo - sync with GlobalId.parsePersistentId(String) ? - that currently doesn't do URL forms, but could public String reassignIdentifierAsGlobalId(String identifierString, DatasetDTO datasetDTO) { + if (identifierString == null) { + logger.warning("Error parsing identifier: is null"); + return null; + } + int index1 = identifierString.indexOf(':'); int index2 = identifierString.indexOf('/'); if (index1==-1) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index b203738a9fd..a3a15f4fed2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -245,7 +245,7 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve logger.fine("importing DC "+metadataFile.getAbsolutePath()); try { String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath())); - dsDTO = importGenericService.processOAIDCxml(xmlToParse); + dsDTO = importGenericService.processOAIDCxml(xmlToParse, harvestIdentifier); } catch (IOException | XMLStreamException e) { throw new ImportException("Failed to process Dublin Core XML record: "+ e.getClass() + " (" + e.getMessage() + ")"); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java index 44739f3f62a..a4a4ffa5dca 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java @@ -1,6 +1,12 @@ package edu.harvard.iq.dataverse.api.imports; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; +import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; + +import org.apache.commons.io.FileUtils; +import com.google.gson.Gson; +import java.io.File; + import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.InjectMocks; @@ -8,6 +14,8 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; +import java.nio.charset.StandardCharsets; + @ExtendWith(MockitoExtension.class) public class ImportGenericServiceBeanTest { @@ -15,7 +23,55 @@ public class ImportGenericServiceBeanTest { private ImportGenericServiceBean importGenericService; @Test - public void testReassignIdentifierAsGlobalId() { + void testIdentifierHarvestableWithOtherID() { + + try { + // "otherIdValue" containing the value : doi:10.7910/DVN/TJCLKP + File file = new File("src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithOtherId.txt"); + String text = FileUtils.readFileToString(file, StandardCharsets.UTF_8); + DatasetVersionDTO dto = new Gson().fromJson(text, DatasetVersionDTO.class); + + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "https://doi.org/10.7910/DVN/TJCLKP")); + // junk or null + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "junk")); + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, null)); + + } catch (Exception e) { + e.printStackTrace(); + } + + } + + @Test + void testIdentifierHarvestableWithoutOtherID() { + try { + // Does not contain data of type "otherIdValue" + File file = new File("src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithoutOtherId.txt"); + String text = FileUtils.readFileToString(file, StandardCharsets.UTF_8); + DatasetVersionDTO dto = new Gson().fromJson(text, DatasetVersionDTO.class); + + // non-URL + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "doi:10.7910/DVN/TJCLKP")); + assertEquals("hdl:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "hdl:10.7910/DVN/TJCLKP")); + // HTTPS + assertEquals("https://doi.org/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "https://doi.org/10.7910/DVN/TJCLKP")); + assertEquals("https://dx.doi.org/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "https://dx.doi.org/10.7910/DVN/TJCLKP")); + assertEquals("https://hdl.handle.net/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "https://hdl.handle.net/10.7910/DVN/TJCLKP")); + // HTTP (no S) + assertEquals("http://doi.org/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "http://doi.org/10.7910/DVN/TJCLKP")); + assertEquals("http://dx.doi.org/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "http://dx.doi.org/10.7910/DVN/TJCLKP")); + assertEquals("http://hdl.handle.net/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "http://hdl.handle.net/10.7910/DVN/TJCLKP")); + // junk or null + assertNull(importGenericService.getIdentifierHarvestableByDataverse(dto, "junk")); + assertNull(importGenericService.getIdentifierHarvestableByDataverse(dto, null)); + + } catch (Exception e) { + e.printStackTrace(); + } + } + + @Test + void testReassignIdentifierAsGlobalId() { // non-URL assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.reassignIdentifierAsGlobalId("doi:10.7910/DVN/TJCLKP", new DatasetDTO())); assertEquals("hdl:10.7910/DVN/TJCLKP", importGenericService.reassignIdentifierAsGlobalId("hdl:10.7910/DVN/TJCLKP", new DatasetDTO())); diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithOtherId.txt b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithOtherId.txt new file mode 100644 index 00000000000..af9241393e9 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithOtherId.txt @@ -0,0 +1,307 @@ +{ + "UNF": "UNF", + "createTime": "2014-11-12 12:17:55 -05", + "distributionDate": "Distribution Date", + "id": 2, + "lastUpdateTime": "2014-11-12 12:20:32 -05", + "metadataBlocks": { + "astrophysics": { + "displayName": "Astronomy and Astrophysics Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "controlledVocabulary", + "typeName": "astroType", + "value": [ + "Image", + "Mosaic", + "EventList" + ] + } + ] + }, + "citation": { + "displayName": "Citation Metadata", + "fields": [ + { + "multiple": false, + "typeClass": "primitive", + "typeName": "title", + "value": "My Dataset" + }, + { + "multiple": true, + "typeClass": "compound", + "typeName": "author", + "value": [ + { + "authorAffiliation": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorAffiliation", + "value": "Top" + }, + "authorIdentifier": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorIdentifier", + "value": "ellenid" + }, + "authorIdentifierScheme": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "authorIdentifierScheme", + "value": "ORCID" + }, + "authorName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorName", + "value": "Privileged, Pete" + } + }, + { + "authorAffiliation": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorAffiliation", + "value": "Bottom" + }, + "authorIdentifier": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorIdentifier", + "value": "audreyId" + }, + "authorIdentifierScheme": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "authorIdentifierScheme", + "value": "DAISY" + }, + "authorName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorName", + "value": "Awesome, Audrey" + } + } + ] + }, + { + "multiple": true, + "typeClass": "primitive", + "typeName": "datasetContact", + "value": [ + "pete@malinator.com" + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescription", + "value": "Here is my description" + }, + { + "multiple": true, + "typeClass": "controlledVocabulary", + "typeName": "subject", + "value": [ + "Arts and Humanities", + "Astronomy and Astrophysics", + "Business and Management" + ] + }, + { + "multiple": true, + "typeClass": "primitive", + "typeName": "keyword", + "value": [ + "keyword1", + "keyword2" + ] + }, + { + "multiple": true, + "typeClass": "compound", + "typeName": "otherId", + "value": [ + { + "otherIdAgency": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdAgency", + "value": "my agency" + }, + "otherIdValue": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdValue", + "value": "otherId" + } + }, + { + "otherIdAgency": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdAgency", + "value": "another agency" + }, + "otherIdValue": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdValue", + "value": "otherId2" + } + }, + { + "otherIdAgency": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdAgency", + "value": "another agency" + }, + "otherIdValue": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdValue", + "value": "doi:10.7910/DVN/TJCLKP" + } + } + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "depositor", + "value": "Ellen K" + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "dateOfDeposit", + "value": "2014-11-12" + } + ] + }, + "geospatial": { + "displayName": "Geospatial Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "compound", + "typeName": "geographicCoverage", + "value": [ + { + "city": { + "multiple": false, + "typeClass": "primitive", + "typeName": "city", + "value": "Arlington" + }, + "country": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "country", + "value": "United States" + }, + "state": { + "multiple": false, + "typeClass": "primitive", + "typeName": "state", + "value": "MA" + } + }, + { + "city": { + "multiple": false, + "typeClass": "primitive", + "typeName": "city", + "value": "beachcity" + }, + "country": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "country", + "value": "Aruba" + }, + "state": { + "multiple": false, + "typeClass": "primitive", + "typeName": "state", + "value": "beach" + } + } + ] + }, + { + "multiple": false, + "typeClass": "compound", + "typeName": "geographicBoundingBox", + "value": + { + "eastLongitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "eastLongitude", + "value": "23" + }, + "northLatitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "northLatitude", + "value": "786" + }, + "southLatitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "southLatitude", + "value": "34" + }, + "westLongitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "westLongitude", + "value": "45" + } + } + + } + ] + }, + "socialscience": { + "displayName": "Social Science and Humanities Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "compound", + "typeName": "software", + "value": [ + { + "softwareName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "softwareName", + "value": "softwareName" + }, + "softwareVersion": { + "multiple": false, + "typeClass": "primitive", + "typeName": "softwareVersion", + "value": "software version" + } + } + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "unitOfAnalysis", + "value": "unit of analysis" + } + ] + } + }, + "productionDate": "Production Date", + "versionState": "DRAFT" + } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithoutOtherId.txt b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithoutOtherId.txt new file mode 100644 index 00000000000..ceb2263c2cf --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithoutOtherId.txt @@ -0,0 +1,258 @@ +{ + "UNF": "UNF", + "createTime": "2014-11-12 12:17:55 -05", + "distributionDate": "Distribution Date", + "id": 2, + "lastUpdateTime": "2014-11-12 12:20:32 -05", + "metadataBlocks": { + "astrophysics": { + "displayName": "Astronomy and Astrophysics Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "controlledVocabulary", + "typeName": "astroType", + "value": [ + "Image", + "Mosaic", + "EventList" + ] + } + ] + }, + "citation": { + "displayName": "Citation Metadata", + "fields": [ + { + "multiple": false, + "typeClass": "primitive", + "typeName": "title", + "value": "My Dataset" + }, + { + "multiple": true, + "typeClass": "compound", + "typeName": "author", + "value": [ + { + "authorAffiliation": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorAffiliation", + "value": "Top" + }, + "authorIdentifier": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorIdentifier", + "value": "ellenid" + }, + "authorIdentifierScheme": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "authorIdentifierScheme", + "value": "ORCID" + }, + "authorName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorName", + "value": "Privileged, Pete" + } + }, + { + "authorAffiliation": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorAffiliation", + "value": "Bottom" + }, + "authorIdentifier": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorIdentifier", + "value": "audreyId" + }, + "authorIdentifierScheme": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "authorIdentifierScheme", + "value": "DAISY" + }, + "authorName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorName", + "value": "Awesome, Audrey" + } + } + ] + }, + { + "multiple": true, + "typeClass": "primitive", + "typeName": "datasetContact", + "value": [ + "pete@malinator.com" + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescription", + "value": "Here is my description" + }, + { + "multiple": true, + "typeClass": "controlledVocabulary", + "typeName": "subject", + "value": [ + "Arts and Humanities", + "Astronomy and Astrophysics", + "Business and Management" + ] + }, + { + "multiple": true, + "typeClass": "primitive", + "typeName": "keyword", + "value": [ + "keyword1", + "keyword2" + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "depositor", + "value": "Ellen K" + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "dateOfDeposit", + "value": "2014-11-12" + } + ] + }, + "geospatial": { + "displayName": "Geospatial Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "compound", + "typeName": "geographicCoverage", + "value": [ + { + "city": { + "multiple": false, + "typeClass": "primitive", + "typeName": "city", + "value": "Arlington" + }, + "country": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "country", + "value": "United States" + }, + "state": { + "multiple": false, + "typeClass": "primitive", + "typeName": "state", + "value": "MA" + } + }, + { + "city": { + "multiple": false, + "typeClass": "primitive", + "typeName": "city", + "value": "beachcity" + }, + "country": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "country", + "value": "Aruba" + }, + "state": { + "multiple": false, + "typeClass": "primitive", + "typeName": "state", + "value": "beach" + } + } + ] + }, + { + "multiple": false, + "typeClass": "compound", + "typeName": "geographicBoundingBox", + "value": + { + "eastLongitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "eastLongitude", + "value": "23" + }, + "northLatitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "northLatitude", + "value": "786" + }, + "southLatitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "southLatitude", + "value": "34" + }, + "westLongitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "westLongitude", + "value": "45" + } + } + + } + ] + }, + "socialscience": { + "displayName": "Social Science and Humanities Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "compound", + "typeName": "software", + "value": [ + { + "softwareName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "softwareName", + "value": "softwareName" + }, + "softwareVersion": { + "multiple": false, + "typeClass": "primitive", + "typeName": "softwareVersion", + "value": "software version" + } + } + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "unitOfAnalysis", + "value": "unit of analysis" + } + ] + } + }, + "productionDate": "Production Date", + "versionState": "DRAFT" + } From 7519acc4b9af391f44a334cb3508ed6ecb8b00b7 Mon Sep 17 00:00:00 2001 From: stevenferey Date: Fri, 8 Nov 2024 17:43:55 +0100 Subject: [PATCH 2/2] Adaptation of unit tests --- .../imports/ImportGenericServiceBeanTest.java | 75 +++++++++---------- .../json/importGenericWithOtherId.json} | 0 .../json/importGenericWithoutOtherId.json} | 0 3 files changed, 35 insertions(+), 40 deletions(-) rename src/test/{java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithOtherId.txt => resources/json/importGenericWithOtherId.json} (100%) rename src/test/{java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithoutOtherId.txt => resources/json/importGenericWithoutOtherId.json} (100%) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java index a4a4ffa5dca..ac17a5981dc 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java @@ -6,6 +6,7 @@ import org.apache.commons.io.FileUtils; import com.google.gson.Gson; import java.io.File; +import java.io.IOException; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -23,51 +24,43 @@ public class ImportGenericServiceBeanTest { private ImportGenericServiceBean importGenericService; @Test - void testIdentifierHarvestableWithOtherID() { - - try { - // "otherIdValue" containing the value : doi:10.7910/DVN/TJCLKP - File file = new File("src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithOtherId.txt"); - String text = FileUtils.readFileToString(file, StandardCharsets.UTF_8); - DatasetVersionDTO dto = new Gson().fromJson(text, DatasetVersionDTO.class); + void testIdentifierHarvestableWithOtherID() throws IOException { + // "otherIdValue" containing the value : doi:10.7910/DVN/TJCLKP + File file = new File("src/test/resources/json/importGenericWithOtherId.json"); + String text = FileUtils.readFileToString(file, StandardCharsets.UTF_8); + DatasetVersionDTO dto = new Gson().fromJson(text, DatasetVersionDTO.class); - assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "https://doi.org/10.7910/DVN/TJCLKP")); - // junk or null - assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "junk")); - assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, null)); - - } catch (Exception e) { - e.printStackTrace(); - } - + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "https://doi.org/10.7910/DVN/TJCLKP")); + // junk or null + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "junk")); + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, null)); + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "http://www.example.com")); + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "https://dataverse.org")); } @Test - void testIdentifierHarvestableWithoutOtherID() { - try { - // Does not contain data of type "otherIdValue" - File file = new File("src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithoutOtherId.txt"); - String text = FileUtils.readFileToString(file, StandardCharsets.UTF_8); - DatasetVersionDTO dto = new Gson().fromJson(text, DatasetVersionDTO.class); + void testIdentifierHarvestableWithoutOtherID() throws IOException { + // Does not contain data of type "otherIdValue" + File file = new File("src/test/resources/json/importGenericWithoutOtherId.json"); + String text = FileUtils.readFileToString(file, StandardCharsets.UTF_8); + DatasetVersionDTO dto = new Gson().fromJson(text, DatasetVersionDTO.class); - // non-URL - assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "doi:10.7910/DVN/TJCLKP")); - assertEquals("hdl:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "hdl:10.7910/DVN/TJCLKP")); - // HTTPS - assertEquals("https://doi.org/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "https://doi.org/10.7910/DVN/TJCLKP")); - assertEquals("https://dx.doi.org/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "https://dx.doi.org/10.7910/DVN/TJCLKP")); - assertEquals("https://hdl.handle.net/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "https://hdl.handle.net/10.7910/DVN/TJCLKP")); - // HTTP (no S) - assertEquals("http://doi.org/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "http://doi.org/10.7910/DVN/TJCLKP")); - assertEquals("http://dx.doi.org/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "http://dx.doi.org/10.7910/DVN/TJCLKP")); - assertEquals("http://hdl.handle.net/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "http://hdl.handle.net/10.7910/DVN/TJCLKP")); - // junk or null - assertNull(importGenericService.getIdentifierHarvestableByDataverse(dto, "junk")); - assertNull(importGenericService.getIdentifierHarvestableByDataverse(dto, null)); - - } catch (Exception e) { - e.printStackTrace(); - } + // non-URL + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "doi:10.7910/DVN/TJCLKP")); + assertEquals("hdl:10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "hdl:10.7910/DVN/TJCLKP")); + // HTTPS + assertEquals("https://doi.org/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "https://doi.org/10.7910/DVN/TJCLKP")); + assertEquals("https://dx.doi.org/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "https://dx.doi.org/10.7910/DVN/TJCLKP")); + assertEquals("https://hdl.handle.net/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "https://hdl.handle.net/10.7910/DVN/TJCLKP")); + // HTTP (no S) + assertEquals("http://doi.org/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "http://doi.org/10.7910/DVN/TJCLKP")); + assertEquals("http://dx.doi.org/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "http://dx.doi.org/10.7910/DVN/TJCLKP")); + assertEquals("http://hdl.handle.net/10.7910/DVN/TJCLKP", importGenericService.getIdentifierHarvestableByDataverse(dto, "http://hdl.handle.net/10.7910/DVN/TJCLKP")); + // junk or null + assertNull(importGenericService.getIdentifierHarvestableByDataverse(dto, "junk")); + assertNull(importGenericService.getIdentifierHarvestableByDataverse(dto, null)); + assertNull(importGenericService.getIdentifierHarvestableByDataverse(dto, "http://www.example.com")); + assertNull(importGenericService.getIdentifierHarvestableByDataverse(dto, "https://dataverse.org")); } @Test @@ -85,6 +78,8 @@ void testReassignIdentifierAsGlobalId() { assertEquals("hdl:10.7910/DVN/TJCLKP", importGenericService.reassignIdentifierAsGlobalId("http://hdl.handle.net/10.7910/DVN/TJCLKP", new DatasetDTO())); // junk assertNull(importGenericService.reassignIdentifierAsGlobalId("junk", new DatasetDTO())); + assertNull(importGenericService.reassignIdentifierAsGlobalId("http://www.example.com", new DatasetDTO())); + assertNull(importGenericService.reassignIdentifierAsGlobalId("https://dataverse.org", new DatasetDTO())); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithOtherId.txt b/src/test/resources/json/importGenericWithOtherId.json similarity index 100% rename from src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithOtherId.txt rename to src/test/resources/json/importGenericWithOtherId.json diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithoutOtherId.txt b/src/test/resources/json/importGenericWithoutOtherId.json similarity index 100% rename from src/test/java/edu/harvard/iq/dataverse/util/json/JsonImportGenericWithoutOtherId.txt rename to src/test/resources/json/importGenericWithoutOtherId.json