diff --git a/Readme.md b/Readme.md index f42d0422..14f90d1d 100644 --- a/Readme.md +++ b/Readme.md @@ -109,6 +109,10 @@ __Important Note__: this Docker is a way to test and play with the biblio-glutto - match record by ISTEX ID - `GET host:port/service/lookup?istexid=ISTEXID` - `GET host:port/service/lookup/istexid/{ISTEXID}` + +- match record by Elsevier ID + - `GET host:port/service/lookup?pii=PII` + - `GET host:port/service/lookup/pii/{PII}` - match record by article title and first author lastname - `GET host:port/service/lookup?atitle=ARTICLE_TITLE&firstAuthor=FIRST_AUTHOR_SURNAME[?postValidate=true]` @@ -141,6 +145,7 @@ In case you are only interested by the Open Access URL for a bibliographical obj - `GET host:port/service/oa?doi=DOI` return the best Open Accss PDF url for a given DOI - `GET host:port/service/oa?pmid=PMID` return the best Open Accss PDF url for a given PMID - `GET host:port/service/oa?pmc=PMC` return the best Open Accss PDF url for a given PMC ID + - `GET host:port/service/oa?pii=PII` return the best Open Accss PDF url for a given Elsevier ID ### cURL examples @@ -178,6 +183,12 @@ Bibliographical metadata lookup by PMC ID (note that the `PMC` prefix in the ide curl http://localhost:8080/service/lookup?pmc=PMC1017419 ``` +Bibliographical metadata lookup by Elsevier ID: + +```sh +curl http://localhost:8080/service/lookup?pii= +``` + Bibliographical metadata lookup by ISTEX ID: ```sh @@ -190,6 +201,8 @@ Open Access resolver by DOI: curl "http://localhost:8080/service/oa?doi=10.1038/nature12373" ``` + + ## Building the bibliographical data look-up and matching databases ### Architecture diff --git a/lookup/notes.md b/lookup/notes.md index 61f3ebaf..766893fa 100644 --- a/lookup/notes.md +++ b/lookup/notes.md @@ -86,7 +86,7 @@ Here a brief description of the API: - match IDs (istexID, ark, etc...) by DOI: - `GET host:port/service/lookup/istex/id?istexid=ISTEXID` for a given ISTEX ID returns the mapping IDs: ark, pmid, etc.. - `GET host:port/service/lookup/istex/id/{ISTEXID}` for a given ISTEX ID returns the mapping IDs: ark, pmid, etc.. - + - Open Access API returns the OA pdf link by doi: - `GET host:port/service/lookup/oa/url?doi=doi` return the best Open Accss PDF url for a given DOI @@ -150,6 +150,10 @@ For simplification, the API only does look-up of full metadata records (crossref - match record by PMC ID - `GET host:port/service/lookup?pmc=PMC` - `GET host:port/service/lookup/pmc/{PMC}` + + - match record by Elsevier ID + - `GET host:port/service/lookup?pii=PII` + - `GET host:port/service/lookup/pii/{PII}` - match record by ISTEX ID - `GET host:port/service/lookup?istexid=ISTEXID` @@ -175,6 +179,7 @@ Open Access API returns the OA pdf link (url) by identifier: - `GET host:port/service/oa?doi=DOI` return the best Open Accss PDF url for a given DOI - `GET host:port/service/oa?pmid=PMID` return the best Open Accss PDF url for a given PMID - `GET host:port/service/oa?pmc=PMC` return the best Open Accss PDF url for a given PMC ID + - `GET host:port/service/oa?pii=PII` return the best Open Accss PDF url for a given Elsevier ID diff --git a/lookup/src/main/java/com/scienceminer/lookup/data/IstexData.java b/lookup/src/main/java/com/scienceminer/lookup/data/IstexData.java index b9085278..b9cfc14d 100644 --- a/lookup/src/main/java/com/scienceminer/lookup/data/IstexData.java +++ b/lookup/src/main/java/com/scienceminer/lookup/data/IstexData.java @@ -1,11 +1,8 @@ package com.scienceminer.lookup.data; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - import java.io.Serializable; import java.util.List; -@JsonIgnoreProperties({"pii"}) public class IstexData implements Serializable { private String corpusName; @@ -21,6 +18,8 @@ public class IstexData implements Serializable { private List mesh; + private List pii; + public String getIstexId() { return istexId; } @@ -76,4 +75,12 @@ public List getMesh() { public void setMesh(List mesh) { this.mesh = mesh; } + + public List getPii() { + return pii; + } + + public void setPii(List pii) { + this.pii = pii; + } } diff --git a/lookup/src/main/java/com/scienceminer/lookup/storage/DataEngine.java b/lookup/src/main/java/com/scienceminer/lookup/storage/DataEngine.java index 71e8bf34..14d72b13 100644 --- a/lookup/src/main/java/com/scienceminer/lookup/storage/DataEngine.java +++ b/lookup/src/main/java/com/scienceminer/lookup/storage/DataEngine.java @@ -68,7 +68,12 @@ public List> retrieveCrossrefRecords(Integer total) { return metadataLookup.retrieveList(total); } + public List> retrieveIstexRecords_piiToIds(Integer total) { + return istexLookup.retrieveList_piiToIds(total); + } + //Setters + protected void setOaDoiLookup(OALookup oaDoiLookup) { this.oaDoiLookup = oaDoiLookup; } @@ -84,5 +89,4 @@ protected void setMetadataLookup(MetadataLookup metadataLookup) { protected void setPmidLookup(PMIdsLookup pmidLookup) { this.pmidLookup = pmidLookup; } - } diff --git a/lookup/src/main/java/com/scienceminer/lookup/storage/LookupEngine.java b/lookup/src/main/java/com/scienceminer/lookup/storage/LookupEngine.java index 94479053..a8e5ff1f 100644 --- a/lookup/src/main/java/com/scienceminer/lookup/storage/LookupEngine.java +++ b/lookup/src/main/java/com/scienceminer/lookup/storage/LookupEngine.java @@ -212,6 +212,24 @@ public String retrieveByIstexid(String istexid, Boolean postValidate, String fir throw new NotFoundException("Cannot find bibliographical record with ISTEX ID " + istexid); } + public String retrieveByPii(String pii, Boolean postValidate, String firstAuthor, String atitle) { + final IstexData istexData = istexLookup.retrieveByPii(pii); + + if (istexData != null && CollectionUtils.isNotEmpty(istexData.getDoi()) && isNotBlank(istexData.getDoi().get(0))) { + final String doi = istexData.getDoi().get(0); + MatchingDocument outputData = metadataLookup.retrieveByMetadata(doi); + + outputData = validateJsonBody(postValidate, firstAuthor, atitle, outputData); + //return injectIdsByIstexData(outputData.getJsonObject(), doi, istexData); + + final String oaLink = oaDoiLookup.retrieveOALinkByDoi(doi); + return injectIdsByIstexData(outputData.getJsonObject(), doi, istexData, oaLink); + } + + throw new NotFoundException("Cannot find bibliographical record by PII " + pii); + } + + // Intermediate lookups public PmidData retrievePMidsByDoi(String doi) { @@ -265,6 +283,17 @@ public String retrieveOAUrlByPmc(String pmc) { throw new NotFoundException("Open Access URL was not found for PM ID " + pmc); } + public String retrieveOAUrlByPii(String pii) { + final IstexData istexData = istexLookup.retrieveByPii(pii); + + if (istexData != null && CollectionUtils.isNotEmpty(istexData.getDoi())) { + return oaDoiLookup.retrieveOALinkByDoi(istexData.getDoi().get(0)); + } + + throw new NotFoundException("Open Access URL was not found for pii " + pii); + } + + public String retrieveByBiblio(String biblio) { final MatchingDocument outputData = metadataMatching.retrieveByBiblio(biblio); return injectIdsByDoi(outputData.getJsonObject(), outputData.getDOI()); @@ -464,6 +493,15 @@ protected String injectIdsByIstexData(String jsonobj, String doi, IstexData iste sb.append("\"mesh\":\"" + istexData.getMesh().get(0) + "\""); foundIstexData = true; } + if (CollectionUtils.isNotEmpty(istexData.getPii())) { + if (!first) { + sb.append(", "); + } else { + first = false; + } + sb.append("\"pii\":\"" + istexData.getPii().get(0) + "\""); + foundIstexData = true; + } } if (!pmid || !pmc) { diff --git a/lookup/src/main/java/com/scienceminer/lookup/storage/lookup/IstexIdsLookup.java b/lookup/src/main/java/com/scienceminer/lookup/storage/lookup/IstexIdsLookup.java index 7d22372d..2c797f2b 100644 --- a/lookup/src/main/java/com/scienceminer/lookup/storage/lookup/IstexIdsLookup.java +++ b/lookup/src/main/java/com/scienceminer/lookup/storage/lookup/IstexIdsLookup.java @@ -1,6 +1,7 @@ package com.scienceminer.lookup.storage.lookup; import com.codahale.metrics.Meter; +import com.google.inject.servlet.ServletScopes; import com.scienceminer.lookup.data.IstexData; import com.scienceminer.lookup.exception.ServiceOverloadedException; import com.scienceminer.lookup.reader.IstexIdsReader; @@ -26,9 +27,10 @@ import static org.apache.commons.lang3.StringUtils.lowerCase; /** - * Lookup: + * Lookups: * - doi -> istex ID, pmid, ark, etc... * - istexID -> doi, pmid, ark, etc... + * - pii -> doi, istex ID, pmid, ark, etc... */ public class IstexIdsLookup { @@ -37,11 +39,13 @@ public class IstexIdsLookup { protected Env environment; protected Dbi dbDoiToIds; protected Dbi dbIstexToIds; + protected Dbi dbPiiToIds; public static final String ENV_NAME = "istex"; public static final String NAME_DOI2IDS = ENV_NAME + "_doi2ids"; public static final String NAME_ISTEX2IDS = ENV_NAME + "_istex2ids"; + public static final String NAME_PII2IDS = ENV_NAME + "_pii2ids"; private final int batchSize; @@ -51,6 +55,7 @@ public IstexIdsLookup(StorageEnvFactory storageEnvFactory) { dbDoiToIds = this.environment.openDbi(NAME_DOI2IDS, DbiFlags.MDB_CREATE); dbIstexToIds = this.environment.openDbi(NAME_ISTEX2IDS, DbiFlags.MDB_CREATE); + dbPiiToIds = this.environment.openDbi(NAME_PII2IDS, DbiFlags.MDB_CREATE); } public void loadFromFile(InputStream is, IstexIdsReader reader, Meter metric) { @@ -72,11 +77,19 @@ public void loadFromFile(InputStream is, IstexIdsReader reader, Meter metric) { } } + // unwrapping list of pii pii -> ids + for (String pii : istexData.getPii()) { + if (isNotBlank(pii)) { + store(dbPiiToIds, lowerCase(pii), istexData, transactionWrapper.tx); + } + } + // istex id -> ids (no need to unwrap) if (isNotBlank(istexData.getIstexId())) { store(dbIstexToIds, istexData.getIstexId(), istexData, transactionWrapper.tx); } + metric.mark(); counter.incrementAndGet(); } @@ -128,6 +141,7 @@ public Map getSize() { try (final Txn txn = this.environment.txnRead()) { size.put(NAME_DOI2IDS, dbDoiToIds.stat(txn).entries); size.put(NAME_ISTEX2IDS, dbIstexToIds.stat(txn).entries); + size.put(NAME_PII2IDS, dbPiiToIds.stat(txn).entries); } catch (Env.ReadersFullException e) { throw new ServiceOverloadedException("Not enough readers for LMDB access, increase them or reduce the parallel request rate. ", e); } @@ -186,7 +200,25 @@ record = (IstexData) BinarySerialiser.deserialize(cachedData); } return record; + } + + public IstexData retrieveByPii(String pii) { + final ByteBuffer keyBuffer = allocateDirect(environment.getMaxKeySize()); + ByteBuffer cachedData = null; + IstexData record = null; + try (Txn tx = environment.txnRead()) { + keyBuffer.put(BinarySerialiser.serialize(lowerCase(pii))).flip(); + cachedData = dbPiiToIds.get(tx, keyBuffer); + if (cachedData != null) { + record = (IstexData) BinarySerialiser.deserialize(cachedData); + } + } catch (Env.ReadersFullException e) { + throw new ServiceOverloadedException("Not enough readers for LMDB access, increase them or reduce the parallel request rate. ", e); + } catch (Exception e) { + LOGGER.error("Cannot retrieve ISTEX identifiers by pii: " + pii, e); + } + return record; } public List> retrieveList_doiToIds(Integer total) { @@ -194,6 +226,10 @@ public List> retrieveList_doiToIds(Integer total) { } + public List> retrieveList_piiToIds(Integer total) { + return retrieveList(total, dbPiiToIds); + } + public List> retrieveList_istexToIds(Integer total) { return retrieveList(total, dbIstexToIds); } diff --git a/lookup/src/main/java/com/scienceminer/lookup/web/resource/DataController.java b/lookup/src/main/java/com/scienceminer/lookup/web/resource/DataController.java index 1e678241..26fdb07e 100644 --- a/lookup/src/main/java/com/scienceminer/lookup/web/resource/DataController.java +++ b/lookup/src/main/java/com/scienceminer/lookup/web/resource/DataController.java @@ -61,6 +61,13 @@ public List> getIstexData_doiToIds(@QueryParam("total") public List> getIstexData_istexIdToIds(@QueryParam("total") Integer total) { return storage.retrieveIstexRecords_istexToIds(total); } + + @GET + @Produces(MediaType.APPLICATION_JSON) + @Path("/istex/pii") + public List> getIstexData_istexpiiToIds(@QueryParam("total") Integer total) { + return storage.retrieveIstexRecords_piiToIds(total); + } @GET @Produces(MediaType.APPLICATION_JSON) diff --git a/lookup/src/main/java/com/scienceminer/lookup/web/resource/LookupController.java b/lookup/src/main/java/com/scienceminer/lookup/web/resource/LookupController.java index 6d306cf2..b386f2f1 100644 --- a/lookup/src/main/java/com/scienceminer/lookup/web/resource/LookupController.java +++ b/lookup/src/main/java/com/scienceminer/lookup/web/resource/LookupController.java @@ -64,6 +64,7 @@ public void getByQueryAsync( @QueryParam("doi") String doi, @QueryParam("pmid") String pmid, @QueryParam("pmc") String pmc, + @QueryParam("pii") String pii, @QueryParam("istexid") String istexid, @QueryParam("firstAuthor") String firstAuthor, @QueryParam("atitle") String atitle, @@ -95,7 +96,7 @@ public void getByQueryAsync( if (postValidate == null) postValidate = Boolean.TRUE; if (parseReference == null) parseReference = Boolean.TRUE; - getByQuery(doi, pmid, pmc, istexid, firstAuthor, atitle, + getByQuery(doi, pmid, pmc, pii, istexid, firstAuthor, atitle, postValidate, jtitle, volume, firstPage, biblio, parseReference, asyncResponse); } @@ -103,6 +104,7 @@ protected void getByQuery( String doi, String pmid, String pmc, + String pii, String istexid, String firstAuthor, String atitle, @@ -163,6 +165,20 @@ protected void getByQuery( } } + if (isNotBlank(pii)) { + areParametersEnoughToLookup = true; + try { + final String response = lookupEngine.retrieveByPii(pii, postValidate, firstAuthor, atitle); + if (isNotBlank(response)) { + asyncResponse.resume(response); + return; + } + + } catch (NotFoundException e) { + LOGGER.warn("PII ID did not matched, move to additional metadata"); + } + } + if (isNotBlank(istexid)) { areParametersEnoughToLookup = true; try { @@ -364,6 +380,13 @@ public String getByPmid(@PathParam("pmid") String pmid) { return lookupEngine.retrieveByPmid(pmid, false, null, null); } + @GET + @Produces(MediaType.APPLICATION_JSON) + @Path("/pii/{pii}") + public String getByPii(@PathParam("pii") String pii) { + return lookupEngine.retrieveByPii(pii, false, null, null); + } + @GET @Produces(MediaType.APPLICATION_JSON) @Path("/pmc/{pmc}") diff --git a/lookup/src/main/java/com/scienceminer/lookup/web/resource/OAController.java b/lookup/src/main/java/com/scienceminer/lookup/web/resource/OAController.java index 15de668e..24b61da6 100644 --- a/lookup/src/main/java/com/scienceminer/lookup/web/resource/OAController.java +++ b/lookup/src/main/java/com/scienceminer/lookup/web/resource/OAController.java @@ -36,7 +36,8 @@ public OAController(LookupConfiguration configuration, StorageEnvFactory storage public OAResource getDoiByMetadataDoi( @QueryParam("doi") String doi, @QueryParam("pmid") String pmid, - @QueryParam("pmc") String pmc + @QueryParam("pmc") String pmc, + @QueryParam("pii") String pii ) { if (isNotBlank(doi)) { @@ -51,6 +52,10 @@ public OAResource getDoiByMetadataDoi( return new OAResource(storage.retrieveOAUrlByPmc(pmc)); } + if (isNotBlank(pii)) { + return new OAResource(storage.retrieveOAUrlByPii(pii)); + } + throw new ServiceException(400, "The supplied parameters were not sufficient to select the query"); } @@ -75,4 +80,11 @@ public OAResource getDoiByMetadataPmid(@PathParam("pmid") String pmid) { public OAResource getDoiByMetadataPmc(@PathParam("pmc") String pmc) { return new OAResource(storage.retrieveOAUrlByPmc(pmc)); } + + @GET + @Produces(MediaType.APPLICATION_JSON) + @Path("/pii/{pii}") + public OAResource getDoiByMetadataPii(@PathParam("pii") String pii) { + return new OAResource(storage.retrieveOAUrlByPii(pii)); + } } diff --git a/lookup/src/test/java/com/scienceminer/lookup/reader/IstexIdsReaderTest.java b/lookup/src/test/java/com/scienceminer/lookup/reader/IstexIdsReaderTest.java index 606f808e..afe79249 100644 --- a/lookup/src/test/java/com/scienceminer/lookup/reader/IstexIdsReaderTest.java +++ b/lookup/src/test/java/com/scienceminer/lookup/reader/IstexIdsReaderTest.java @@ -21,7 +21,7 @@ public void setUp() { @Test public void test() throws Exception { - IstexData metadata = target.fromJson("{\"corpusName\":\"bmj\",\"istexId\":\"052DFBD14E0015CA914E28A0A561675D36FFA2CC\",\"ark\":[\"ark:/67375/NVC-W015BZV5-Q\"],\"doi\":[\"10.1136/sti.53.1.56\"],\"pmid\":[\"557360\"],\"pii\":[]}"); + IstexData metadata = target.fromJson("{\"corpusName\":\"bmj\",\"istexId\":\"052DFBD14E0015CA914E28A0A561675D36FFA2CC\",\"ark\":[\"ark:/67375/NVC-W015BZV5-Q\"],\"doi\":[\"10.1136/sti.53.1.56\"],\"pmid\":[\"557360\"],\"pii\":[\"123\"]}"); assertThat(metadata, is(not(nullValue()))); assertThat(metadata.getCorpusName(), is("bmj")); @@ -34,6 +34,9 @@ public void test() throws Exception { assertThat(metadata.getPmid(), hasSize(1)); assertThat(metadata.getPmid().get(0), is("557360")); + + assertThat(metadata.getPii(), hasSize(1)); + assertThat(metadata.getPii().get(0), is("123")); } @Test diff --git a/lookup/src/test/java/com/scienceminer/lookup/web/resource/LookupControllerTest.java b/lookup/src/test/java/com/scienceminer/lookup/web/resource/LookupControllerTest.java index df13d8c5..9942c13d 100644 --- a/lookup/src/test/java/com/scienceminer/lookup/web/resource/LookupControllerTest.java +++ b/lookup/src/test/java/com/scienceminer/lookup/web/resource/LookupControllerTest.java @@ -64,7 +64,7 @@ public void getByQuery_DOIexists_passingPostValidation_shouldReturnJSONBody() { expect(mockedAsyncResponse.resume(response.getJsonObject())).andReturn(true); replay(mockMetadataLookup, mockedAsyncResponse, mockPmidsLookup, mockOALookup, mockIstexLookup, mockMetadataMatching); - target.getByQuery(myDOI, null, null, null, firstAuthor, atitle, + target.getByQuery(myDOI, null, null, null, null, firstAuthor, atitle, postValidate, null, null, null, null, null, mockedAsyncResponse); verify(mockMetadataLookup, mockedAsyncResponse, mockPmidsLookup, mockOALookup, mockIstexLookup, mockMetadataMatching); @@ -96,7 +96,7 @@ public void getByQuery_DOIexists_NotPassingPostValidation_shouldReturnJSONFromTi // expect(mockedAsyncResponse.resume(response)).andReturn(true); replay(mockMetadataLookup, mockedAsyncResponse, mockPmidsLookup, mockOALookup, mockIstexLookup, mockMetadataMatching); - target.getByQuery(myDOI, null, null, null, firstAuthor, atitle, + target.getByQuery(myDOI, null, null, null, null, firstAuthor, atitle, postValidate, null, null, null, null, null, mockedAsyncResponse); verify(mockMetadataLookup, mockedAsyncResponse, mockPmidsLookup, mockOALookup, mockIstexLookup, mockMetadataMatching); @@ -120,7 +120,7 @@ public void getByQuery_DOIexists_WithPostvalidation_shouldReturnJSONFromTitleFir mockMetadataMatching.retrieveByMetadataAsync(eq(atitle), eq(firstAuthor), anyObject()); replay(mockMetadataLookup, mockedAsyncResponse, mockPmidsLookup, mockOALookup, mockIstexLookup, mockMetadataMatching); - target.getByQuery(myDOI, null, null, null, firstAuthor, atitle, + target.getByQuery(myDOI, null, null, null, null, firstAuthor, atitle, postValidate, null, null, null, null, null, mockedAsyncResponse); verify(mockMetadataLookup, mockedAsyncResponse, mockPmidsLookup, mockOALookup, mockIstexLookup, mockMetadataMatching);