-
Notifications
You must be signed in to change notification settings - Fork 490
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Iqss/7493 batch archiving api call #7494
Changes from all commits
6eae5e4
8313404
70d923a
96d3723
cb9f374
76e2396
2e8d990
b796833
006a4ba
bba8ba0
011c97a
1a1c28c
8a0ad71
7b5aead
fd32dfd
805ff95
e1415f9
ef9a0b9
9443e04
242befa
7047d00
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,6 +45,7 @@ | |
import javax.json.JsonArrayBuilder; | ||
import javax.json.JsonObjectBuilder; | ||
import javax.ws.rs.DELETE; | ||
import javax.ws.rs.DefaultValue; | ||
import javax.ws.rs.GET; | ||
import javax.ws.rs.POST; | ||
import javax.ws.rs.PUT; | ||
|
@@ -1773,6 +1774,93 @@ public void run() { | |
} | ||
} | ||
|
||
|
||
/** | ||
* Iteratively archives all unarchived dataset versions | ||
* @param | ||
* listonly - don't archive, just list unarchived versions | ||
* limit - max number to process | ||
* lastestonly - only archive the latest versions | ||
* @return | ||
*/ | ||
@GET | ||
@Path("/archiveAllUnarchivedDataVersions") | ||
public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") boolean listonly, @QueryParam("limit") Integer limit, @QueryParam("latestonly") boolean latestonly) { | ||
|
||
try { | ||
AuthenticatedUser au = findAuthenticatedUserOrDie(); | ||
// Note - the user is being set in the session so it becomes part of the | ||
// DataverseRequest and is sent to the back-end command where it is used to get | ||
// the API Token which is then used to retrieve files (e.g. via S3 direct | ||
// downloads) to create the Bag | ||
session.setUser(au); | ||
List<DatasetVersion> dsl = datasetversionService.getUnarchivedDatasetVersions(); | ||
if (dsl != null) { | ||
if (listonly) { | ||
JsonArrayBuilder jab = Json.createArrayBuilder(); | ||
logger.info("Unarchived versions found: "); | ||
int current = 0; | ||
for (DatasetVersion dv : dsl) { | ||
if (limit != null && current >= limit) { | ||
break; | ||
} | ||
if (!latestonly || dv.equals(dv.getDataset().getLatestVersionForCopy())) { | ||
jab.add(dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); | ||
logger.info(" " + dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); | ||
current++; | ||
} | ||
} | ||
return ok(jab); | ||
} | ||
String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); | ||
AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dsl.get(0)); | ||
|
||
if (cmd != null) { | ||
new Thread(new Runnable() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not personally very familiar with threads but there do seem to be a couple other places in the code (also added by Jim) that use this new Thread/new Runnable pattern and work fine. I think Jakarta EE offers various ways of handling threads and I know we use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch! FWIW: I don't know details but I think this is basically an EJB vs plain Java difference. Using @Asyncrhonous starts a thread in a managed pool (and I think there's a ManagedExcutorService that would allow you to specify a separate pool that could be managed in the EJB config). Probably would be good to recommend Asynch and think about updating these at some point. That said, since this call just starts one thread to serially chunk through things so I don't think it should cause problems as is (and having others who know more weigh in before making a change might be useful). |
||
public void run() { | ||
int total = dsl.size(); | ||
int successes = 0; | ||
int failures = 0; | ||
for (DatasetVersion dv : dsl) { | ||
if (limit != null && (successes + failures) >= limit) { | ||
break; | ||
} | ||
if (!latestonly || dv.equals(dv.getDataset().getLatestVersionForCopy())) { | ||
try { | ||
AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); | ||
|
||
dv = commandEngine.submit(cmd); | ||
if (dv.getArchivalCopyLocation() != null) { | ||
successes++; | ||
logger.info("DatasetVersion id=" + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber() + " submitted to Archive at: " | ||
+ dv.getArchivalCopyLocation()); | ||
} else { | ||
failures++; | ||
logger.severe("Error submitting version due to conflict/error at Archive for " + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber()); | ||
} | ||
} catch (CommandException ex) { | ||
failures++; | ||
logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); | ||
} | ||
} | ||
logger.fine(successes + failures + " of " + total + " archive submissions complete"); | ||
} | ||
logger.info("Archiving complete: " + successes + " Successes, " + failures + " Failures. See prior log messages for details."); | ||
} | ||
}).start(); | ||
return ok("Archiving all unarchived published dataset versions using " + cmd.getClass().getCanonicalName() + ". Processing can take significant time for large datasets/ large numbers of dataset versions. View log and/or check archive for results."); | ||
} else { | ||
logger.log(Level.SEVERE, "Could not find Archiver class: " + className); | ||
return error(Status.INTERNAL_SERVER_ERROR, "Could not find Archiver class: " + className); | ||
} | ||
} else { | ||
return error(Status.BAD_REQUEST, "No unarchived published dataset versions found"); | ||
} | ||
} catch (WrappedResponse e1) { | ||
return error(Status.UNAUTHORIZED, "api key required"); | ||
} | ||
} | ||
|
||
@DELETE | ||
@Path("/clearMetricsCache") | ||
public Response clearMetricsCache() { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At first I thought this anchor was being used and references to it should be change but apparently it isn't. It sort of makes me wonder if we should delete it if it isn't being used. No strong preference.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah - it seems random as to whether anchors exist or not. They can be useful beyond just internal links (e.g. you can use them to point people directly to that section via URL in email, etc., so having anchors might be a good default for any significant topics. That said, not a show stopper if it gets deleted.