Skip to content

Commit

Permalink
An automated test of an actual harvest (#8843)
Browse files Browse the repository at this point in the history
  • Loading branch information
landreev committed Dec 12, 2022
1 parent 9cbfa31 commit 395d605
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 44 deletions.
31 changes: 2 additions & 29 deletions src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java
Original file line number Diff line number Diff line change
Expand Up @@ -373,13 +373,13 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname,
}

if (authenticatedUser == null || !authenticatedUser.isSuperuser()) {
return error(Response.Status.FORBIDDEN, "Only the Dataverse Admin user can run harvesting jobs");
return error(Response.Status.FORBIDDEN, "Only admin users can run harvesting jobs");
}

HarvestingClient harvestingClient = harvestingClientService.findByNickname(clientNickname);

if (harvestingClient == null) {
return error(Response.Status.NOT_FOUND, "No such dataverse: "+clientNickname);
return error(Response.Status.NOT_FOUND, "No such client: "+clientNickname);
}

DataverseRequest dataverseRequest = createDataverseRequest(authenticatedUser);
Expand All @@ -391,35 +391,8 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname,
return this.accepted();
}

// This GET shows the status of the harvesting run in progress for this
// client, if present:
// @GET
// @Path("{nickName}/run")
// TODO:

// This DELETE kills the harvesting run in progress for this client,
// if present:
// @DELETE
// @Path("{nickName}/run")
// TODO:





/* Auxiliary, helper methods: */

/*
@Deprecated
public static JsonArrayBuilder harvestingConfigsAsJsonArray(List<Dataverse> harvestingDataverses) {
JsonArrayBuilder hdArr = Json.createArrayBuilder();
for (Dataverse hd : harvestingDataverses) {
hdArr.add(harvestingConfigAsJson(hd.getHarvestingClientConfig()));
}
return hdArr;
}*/

public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvestingConfig) {
if (harvestingConfig == null) {
return null;
Expand Down
169 changes: 154 additions & 15 deletions src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
Original file line number Diff line number Diff line change
@@ -1,48 +1,81 @@
package edu.harvard.iq.dataverse.api;

import java.util.logging.Logger;
import java.util.logging.Level;
import com.jayway.restassured.RestAssured;
import static com.jayway.restassured.RestAssured.given;
import org.junit.Test;
import com.jayway.restassured.response.Response;
import static javax.ws.rs.core.Response.Status.CREATED;
import static javax.ws.rs.core.Response.Status.UNAUTHORIZED;
import static javax.ws.rs.core.Response.Status.ACCEPTED;
import static javax.ws.rs.core.Response.Status.OK;
import static org.hamcrest.CoreMatchers.equalTo;
import static junit.framework.Assert.assertEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import org.junit.BeforeClass;

/**
* extremely minimal (for now) API tests for creating OAI clients.
* This class tests Harvesting Client functionality.
* Note that these methods test BOTH the proprietary Dataverse rest API for
* creating and managing harvesting clients, AND the underlining OAI-PMH
* harvesting functionality itself. I.e., we will use the Dataverse
* /api/harvest/clients/ api to run an actual harvest of a control set and
* then validate the resulting harvested content.
*/
public class HarvestingClientsIT {

private static final Logger logger = Logger.getLogger(HarvestingClientsIT.class.getCanonicalName());

private static final String harvestClientsApi = "/api/harvest/clients/";
private static final String harvestCollection = "root";
private static final String rootCollection = "root";
private static final String harvestUrl = "https://demo.dataverse.org/oai";
private static final String archiveUrl = "https://demo.dataverse.org";
private static final String harvestMetadataFormat = "oai_dc";
private static final String archiveDescription = "RestAssured harvesting client test";
private static final String controlOaiSet = "controlTestSet";
private static final int datasetsInControlSet = 7;
private static String normalUserAPIKey;
private static String adminUserAPIKey;
private static String harvestCollectionAlias;

@BeforeClass
public static void setUpClass() {
RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();

// Create the users, an admin and a non-admin:
setupUsers();

// Create a collection that we will use to harvest remote content into:
setupCollection();

}

private void setupUsers() {
private static void setupUsers() {
Response cu0 = UtilIT.createRandomUser();
normalUserAPIKey = UtilIT.getApiTokenFromResponse(cu0);
Response cu1 = UtilIT.createRandomUser();
String un1 = UtilIT.getUsernameFromResponse(cu1);
Response u1a = UtilIT.makeSuperUser(un1);
adminUserAPIKey = UtilIT.getApiTokenFromResponse(cu1);
}

private static void setupCollection() {
Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey);
createDataverseResponse.prettyPrint();
assertEquals(CREATED.getStatusCode(), createDataverseResponse.getStatusCode());

harvestCollectionAlias = UtilIT.getAliasFromResponse(createDataverseResponse);

private String normalUserAPIKey;
private String adminUserAPIKey;
// publish dataverse:
Response publishDataverse = UtilIT.publishDataverseViaNativeApi(harvestCollectionAlias, adminUserAPIKey);
assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode());
}

@Test
public void testCreateEditDeleteClient() {
setupUsers();
//setupUsers();
String nickName = UtilIT.getRandomString(6);


Expand All @@ -52,7 +85,7 @@ public void testCreateEditDeleteClient() {
+ "\"harvestUrl\":\"%s\","
+ "\"archiveUrl\":\"%s\","
+ "\"metadataFormat\":\"%s\"}",
harvestCollection, harvestUrl, archiveUrl, harvestMetadataFormat);
rootCollection, harvestUrl, archiveUrl, harvestMetadataFormat);


// Try to create a client as normal user, should fail:
Expand All @@ -61,7 +94,7 @@ public void testCreateEditDeleteClient() {
.header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey)
.body(clientJson)
.post(clientApiPath);
assertEquals(401, rCreate.getStatusCode());
assertEquals(UNAUTHORIZED.getStatusCode(), rCreate.getStatusCode());


// Try to create the same as admin user, should succeed:
Expand All @@ -70,7 +103,7 @@ public void testCreateEditDeleteClient() {
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.body(clientJson)
.post(clientApiPath);
assertEquals(201, rCreate.getStatusCode());
assertEquals(CREATED.getStatusCode(), rCreate.getStatusCode());

// Try to update the client we have just created:

Expand All @@ -80,7 +113,7 @@ public void testCreateEditDeleteClient() {
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.body(updateJson)
.put(clientApiPath);
assertEquals(200, rUpdate.getStatusCode());
assertEquals(OK.getStatusCode(), rUpdate.getStatusCode());

// Now let's retrieve the client we've just created and edited:

Expand All @@ -89,7 +122,7 @@ public void testCreateEditDeleteClient() {

logger.info("getClient.getStatusCode(): " + getClientResponse.getStatusCode());
logger.info("getClient printresponse: " + getClientResponse.prettyPrint());
assertEquals(200, getClientResponse.getStatusCode());
assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode());

// ... and validate the values:

Expand All @@ -98,7 +131,7 @@ public void testCreateEditDeleteClient() {
.body("data.type", equalTo("oai"))
.body("data.nickName", equalTo(nickName))
.body("data.archiveDescription", equalTo(archiveDescription))
.body("data.dataverseAlias", equalTo(harvestCollection))
.body("data.dataverseAlias", equalTo(rootCollection))
.body("data.harvestUrl", equalTo(harvestUrl))
.body("data.archiveUrl", equalTo(archiveUrl))
.body("data.metadataFormat", equalTo(harvestMetadataFormat));
Expand All @@ -109,14 +142,120 @@ public void testCreateEditDeleteClient() {
.header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey)
.delete(clientApiPath);
logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode());
assertEquals(401, rDelete.getStatusCode());
assertEquals(UNAUTHORIZED.getStatusCode(), rDelete.getStatusCode());

// Try to delete as admin user should work:

rDelete = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.delete(clientApiPath);
logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode());
assertEquals(200, rDelete.getStatusCode());
assertEquals(OK.getStatusCode(), rDelete.getStatusCode());
}

@Test
public void testHarvestingClientRun() throws InterruptedException {
// This test will create a client and attempt to perform an actual
// harvest and validate the resulting harvested content.

// Setup: create the client via the API
// since this API is tested somewhat extensively in the previous
// method, we don't need to pay too much attention to this method, aside
// from confirming the expected HTTP status code.

String nickName = UtilIT.getRandomString(6);

String clientApiPath = String.format(harvestClientsApi+"%s", nickName);
String clientJson = String.format("{\"dataverseAlias\":\"%s\","
+ "\"type\":\"oai\","
+ "\"harvestUrl\":\"%s\","
+ "\"archiveUrl\":\"%s\","
+ "\"set\":\"%s\","
+ "\"metadataFormat\":\"%s\"}",
harvestCollectionAlias, harvestUrl, archiveUrl, controlOaiSet, harvestMetadataFormat);

Response createResponse = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.body(clientJson)
.post(clientApiPath);
assertEquals(CREATED.getStatusCode(), createResponse.getStatusCode());

// API TEST 1. Run the harvest using the configuration (client) we have
// just created

String runHarvestApiPath = String.format(harvestClientsApi+"%s/run", nickName);

// TODO? - verify that a non-admin user cannot perform this operation (401)

Response runResponse = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.post(runHarvestApiPath);
assertEquals(ACCEPTED.getStatusCode(), runResponse.getStatusCode());

// API TEST 2. As indicated by the ACCEPTED status code above, harvesting
// is an asynchronous operation that will be performed in the background.
// Verify that this "in progress" status is properly reported while it's
// running, and that it completes in some reasonable amount of time.

int i = 0;
int maxWait=20; // a very conservative interval; this harvest has no business taking this long
do {
// keep checking the status of the client with the GET api:
Response getClientResponse = given()
.get(clientApiPath);

assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode());
assertEquals(AbstractApiBean.STATUS_OK, getClientResponse.body().jsonPath().getString("status"));

if (logger.isLoggable(Level.FINE)) {
logger.info("listIdentifiersResponse.prettyPrint: "
+ getClientResponse.prettyPrint());
}

String clientStatus = getClientResponse.body().jsonPath().getString("data.status");
assertNotNull(clientStatus);

if ("inProgress".equals(clientStatus)) {
// we'll sleep for another second
i++;
} else {
// Check the values in the response:
// a) Confirm that the harvest has completed:
assertEquals("Unexpected client status: "+clientStatus, "inActive", clientStatus);

// b) Confirm that it has actually succeeded:
assertEquals("Last harvest not reported a success", "SUCCESS", getClientResponse.body().jsonPath().getString("data.lastResult"));
String harvestTimeStamp = getClientResponse.body().jsonPath().getString("data.lastHarvest");
assertNotNull(harvestTimeStamp);

// c) Confirm that the other timestamps match:
assertEquals(harvestTimeStamp, getClientResponse.body().jsonPath().getString("data.lastSuccessful"));
assertEquals(harvestTimeStamp, getClientResponse.body().jsonPath().getString("data.lastNonEmpty"));

// d) Confirm that the correct number of datasets have been harvested:
assertEquals(datasetsInControlSet, getClientResponse.body().jsonPath().getInt("data.lastDatasetsHarvested"));

// ok, it looks like the harvest has completed successfully.
break;
}
Thread.sleep(1000L);
} while (i<maxWait);

System.out.println("Waited " + i + " seconds for the harvest to complete.");

// Fail if it hasn't completed in maxWait seconds
assertTrue(i < maxWait);

// TODO: use the native Dataverses/Datasets apis to verify that the expected
// datasets have been harvested.

// Cleanup: delete the client

Response deleteResponse = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.delete(clientApiPath);
System.out.println("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode());
assertEquals(OK.getStatusCode(), deleteResponse.getStatusCode());

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -873,4 +873,12 @@ public void testMultiRecordOaiSet() throws InterruptedException {
logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode());
assertEquals("Failed to delete the control multi-record set", 200, deleteResponse.getStatusCode());
}

// TODO:
// What else can we test?
// Some ideas:
// - Test handling of deleted dataset records
// - Test "from" and "until" time parameters
// - Test validating full verb response records against XML schema
// (for each supported metadata format, possibly?)
}

0 comments on commit 395d605

Please sign in to comment.