diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/ExpressionAtlasExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/ExpressionAtlasExecutor.java index d5e36f8e9..b85e6e168 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/ExpressionAtlasExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/ExpressionAtlasExecutor.java @@ -1,28 +1,24 @@ package org.alliancegenome.curation_api.jobs.executors; -import static org.alliancegenome.curation_api.services.DataProviderService.RESOURCE_DESCRIPTOR_PREFIX; - import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.List; -import java.util.Objects; -import org.alliancegenome.curation_api.model.entities.CrossReference; -import org.alliancegenome.curation_api.model.entities.DataProvider; -import org.alliancegenome.curation_api.model.entities.Organization; -import org.alliancegenome.curation_api.model.entities.ResourceDescriptorPage; +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.KnownIssueValidationException; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkURLLoad; -import org.alliancegenome.curation_api.services.DataProviderService; -import org.alliancegenome.curation_api.services.OrganizationService; -import org.alliancegenome.curation_api.services.ResourceDescriptorPageService; +import org.alliancegenome.curation_api.services.GeneService; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; -import org.jetbrains.annotations.NotNull; +import org.apache.commons.collections.CollectionUtils; import com.fasterxml.jackson.dataformat.xml.XmlMapper; import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; +import io.quarkus.logging.Log; import jakarta.enterprise.context.ApplicationScoped; import jakarta.inject.Inject; @@ -30,11 +26,7 @@ public class ExpressionAtlasExecutor extends LoadFileExecutor { @Inject - DataProviderService service; - @Inject - ResourceDescriptorPageService resourceDescriptorPageService; - @Inject - OrganizationService organizationService; + GeneService geneService; public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) throws IOException { @@ -50,53 +42,61 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) throws IOException String name = bulkLoadFileHistory.getBulkLoad().getName(); String dataProviderName = name.substring(0, name.indexOf(" ")); + + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); - Organization organization = organizationService.getByAbbr(dataProviderName).getEntity(); - ResourceDescriptorPage ensemblGenePage = resourceDescriptorPageService.getPageForResourceDescriptor("ENSEMBL", "expression_atlas"); - - List dataProviderIdsBefore = - new ArrayList<>(service.getDataProviderMap(organization, ensemblGenePage).values().stream().map(DataProvider::getId).toList()); - dataProviderIdsBefore.removeIf(Objects::isNull); - - List dataProviderIdsLoaded = new ArrayList<>(); - ProcessDisplayHelper ph = new ProcessDisplayHelper(); - ph.addDisplayHandler(loadProcessDisplayService); - ph.startProcess(name, accessions.size()); - accessions.forEach(accession -> { - CrossReference reference = getCrossReference(ensemblGenePage, accession, organization); - DataProvider provider = new DataProvider(); - provider.setSourceOrganization(organization); - provider.setCrossReference(reference); - DataProvider entity = service.insertExpressionAtlasDataProvider(provider).getEntity(); - if (entity != null) { - dataProviderIdsLoaded.add(entity.getId()); - bulkLoadFileHistory.incrementCompleted(); - } else { - bulkLoadFileHistory.incrementSkipped(); - } - ph.progressProcess(); - }); - bulkLoadFileHistory.setTotalCount(accessions.size()); - runCleanup(service, bulkLoadFileHistory, dataProviderName, dataProviderIdsBefore, dataProviderIdsLoaded, "Atlas Load Type"); - ph.finishProcess(); - updateHistory(bulkLoadFileHistory); + runLoad(bulkLoadFileHistory, dataProvider, accessions); bulkLoadFileHistory.finishLoad(); updateHistory(bulkLoadFileHistory); updateExceptions(bulkLoadFileHistory); } - - @NotNull - private static CrossReference getCrossReference(ResourceDescriptorPage ensemblGenePage, String accession, Organization organization) { - CrossReference reference = new CrossReference(); - if (List.of("FB", "SGD").contains(organization.getAbbreviation())) { - reference.setReferencedCurie(accession); - } else { - reference.setReferencedCurie(RESOURCE_DESCRIPTOR_PREFIX + ":" + accession); + + private void runLoad(BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List identifiers) { + ProcessDisplayHelper ph = new ProcessDisplayHelper(); + ph.addDisplayHandler(loadProcessDisplayService); + if (CollectionUtils.isNotEmpty(identifiers)) { + String loadMessage = "Expression Atlas cross-reference update"; + if (dataProvider != null) { + loadMessage = loadMessage + " for " + dataProvider.name(); + } + ph.startProcess(loadMessage, identifiers.size()); + + history.setCount(identifiers.size()); + updateHistory(history); + + for (String identifier : identifiers) { + try { + geneService.addExpressionAtlasXref(identifier, dataProvider); + history.incrementCompleted(); + } catch (ObjectUpdateException e) { + history.incrementFailed(); + addException(history, e.getData()); + } catch (KnownIssueValidationException e) { + Log.debug(e.getMessage()); + history.incrementSkipped(); + } catch (Exception e) { + e.printStackTrace(); + history.incrementFailed(); + addException(history, new ObjectUpdateExceptionData(identifier, e.getMessage(), e.getStackTrace())); + } + if (history.getErrorRate() > 0.25) { + Log.error("Failure Rate > 25% aborting load"); + updateHistory(history); + updateExceptions(history); + failLoadAboveErrorRateCutoff(history); + return; + } + ph.progressProcess(); + if (Thread.currentThread().isInterrupted()) { + Log.info("Thread Interrupted:"); + break; + } + } + updateHistory(history); + updateExceptions(history); + ph.finishProcess(); } - reference.setDisplayName(accession); - reference.setResourceDescriptorPage(ensemblGenePage); - return reference; } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/CrossReferenceService.java b/src/main/java/org/alliancegenome/curation_api/services/CrossReferenceService.java index a41be0708..42e4427a7 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/CrossReferenceService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/CrossReferenceService.java @@ -77,18 +77,20 @@ public List getUpdatedXrefList(List incomingXref List finalXrefs = new ArrayList<>(); List addedXrefUniqueIds = new ArrayList<>(); + List combinedXrefs = new ArrayList<>(); + combinedXrefs.addAll(incomingXrefs); if (keepAllExisting && CollectionUtils.isNotEmpty(existingXrefs)) { - incomingXrefs.addAll(existingXrefs); + combinedXrefs.addAll(existingXrefs); } - if (CollectionUtils.isNotEmpty(incomingXrefs)) { - for (CrossReference incomingXref : incomingXrefs) { - String incomingXrefUniqueId = getCrossReferenceUniqueId(incomingXref); + if (CollectionUtils.isNotEmpty(combinedXrefs)) { + for (CrossReference xref : combinedXrefs) { + String incomingXrefUniqueId = getCrossReferenceUniqueId(xref); if (!addedXrefUniqueIds.contains(incomingXrefUniqueId)) { if (existingXrefMap.containsKey(incomingXrefUniqueId)) { - finalXrefs.add(updateCrossReference(existingXrefMap.get(incomingXrefUniqueId), incomingXref)); + finalXrefs.add(updateCrossReference(existingXrefMap.get(incomingXrefUniqueId), xref)); } else { - finalXrefs.add(crossReferenceDAO.persist(incomingXref)); + finalXrefs.add(crossReferenceDAO.persist(xref)); } addedXrefUniqueIds.add(incomingXrefUniqueId); } diff --git a/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java b/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java index f19f2e68f..e29933a36 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java @@ -1,32 +1,26 @@ package org.alliancegenome.curation_api.services; -import jakarta.annotation.PostConstruct; -import jakarta.enterprise.context.RequestScoped; -import jakarta.inject.Inject; -import jakarta.transaction.Transactional; import org.alliancegenome.curation_api.auth.AuthenticatedUser; -import org.alliancegenome.curation_api.dao.*; -import org.alliancegenome.curation_api.model.entities.*; +import org.alliancegenome.curation_api.dao.CrossReferenceDAO; +import org.alliancegenome.curation_api.dao.DataProviderDAO; +import org.alliancegenome.curation_api.dao.GeneDAO; +import org.alliancegenome.curation_api.dao.OrganizationDAO; +import org.alliancegenome.curation_api.dao.SpeciesDAO; +import org.alliancegenome.curation_api.model.entities.AllianceMember; +import org.alliancegenome.curation_api.model.entities.DataProvider; +import org.alliancegenome.curation_api.model.entities.Person; import org.alliancegenome.curation_api.response.ObjectResponse; import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; import org.alliancegenome.curation_api.services.validation.DataProviderValidator; -import org.jetbrains.annotations.NotNull; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; +import jakarta.transaction.Transactional; @RequestScoped public class DataProviderService extends BaseEntityCrudService { - public static final String RESOURCE_DESCRIPTOR_PREFIX = "ENSEMBL"; - public static final String RESOURCE_DESCRIPTOR_PAGE_NAME = "default"; - // - Map accessionGeneMap = new HashMap<>(); - HashMap dataProviderMap = new HashMap<>(); - - @Inject @AuthenticatedUser protected Person authenticatedPerson; @@ -70,77 +64,6 @@ public DataProvider getDefaultDataProvider(String sourceOrganizationAbbreviation return dataProviderDAO.getOrCreateDataProvider(organizationDAO.getOrCreateOrganization(sourceOrganizationAbbreviation)); } - @Transactional - public ObjectResponse insertExpressionAtlasDataProvider(DataProvider entity) { - String referencedCurie = entity.getCrossReference().getReferencedCurie(); - // find associated gene - Long geneID = getAssociatedGeneId(referencedCurie, entity.getSourceOrganization()); - // if no gene found skip (= don't import) the accession - if (geneID == null) { - return new ObjectResponse<>(); - } - - DataProvider dbEntity = getDataProvider(entity.getSourceOrganization(), referencedCurie, entity.getCrossReference().getResourceDescriptorPage()); - // we only create new records, no updates - if (dbEntity == null) { - dataProviderDAO.persist(entity); - if (!List.of("FB", "SGD").contains(entity.getSourceOrganization().getAbbreviation())) { - crossReferenceDAO.persistAccessionGeneAssociated(entity.getCrossReference().getId(), geneID); - } - return new ObjectResponse<>(entity); - } - return new ObjectResponse<>(dbEntity); - } - - @NotNull - public static String getFullReferencedCurie(String localReferencedCurie) { - return RESOURCE_DESCRIPTOR_PREFIX + ":" + localReferencedCurie; - } - - private Long getAssociatedGeneId(String fullReferencedCurie, Organization sourceOrganization) { - String orgAbbreviation = sourceOrganization.getAbbreviation(); - if (orgAbbreviation.equals("FB")) { - fullReferencedCurie = orgAbbreviation + ":" + fullReferencedCurie; - } - if (accessionGeneMap.size() == 0) { - if (List.of("FB", "SGD").contains(orgAbbreviation)) { - Map map = new HashMap<>(); - map.put("displayName", orgAbbreviation); - Species species = speciesDAO.findByParams(map).getSingleResult(); - accessionGeneMap = geneDAO.getAllGeneIdsPerSpecies(species); - } else { - ResourceDescriptorPage page = resourceDescriptorPageService.getPageForResourceDescriptor(RESOURCE_DESCRIPTOR_PREFIX, RESOURCE_DESCRIPTOR_PAGE_NAME); - accessionGeneMap = crossReferenceDAO.getGenesWithCrossRefs(page); - } - } - return accessionGeneMap.get(fullReferencedCurie); - } - - private DataProvider getDataProvider(Organization sourceOrganization, String crossReferenceCurie, ResourceDescriptorPage page) { - if (dataProviderMap.size() > 0) { - return dataProviderMap.get(crossReferenceCurie); - } - populateDataProviderMap(sourceOrganization, page); - return dataProviderMap.get(crossReferenceCurie); - } - - private void populateDataProviderMap(Organization sourceOrganization, ResourceDescriptorPage page) { - List allOrgProvider = dataProviderDAO.getAllDataProvider(sourceOrganization, page); - allOrgProvider.stream() - .filter(dataProvider -> dataProvider.getCrossReference() != null && Objects.equals(dataProvider.getCrossReference().getResourceDescriptorPage().getId(), page.getId())) - .forEach(dataProvider -> { - dataProviderMap.put(dataProvider.getCrossReference().getReferencedCurie(), dataProvider); - }); - } - - public HashMap getDataProviderMap(Organization sourceOrganization, ResourceDescriptorPage page) { - if (dataProviderMap.size() > 0) { - return dataProviderMap; - } - populateDataProviderMap(sourceOrganization, page); - return dataProviderMap; - } - @Transactional public ObjectResponse upsert(DataProvider uiEntity) { ObjectResponse response = dataProviderValidator.validateDataProvider(uiEntity, null, true); diff --git a/src/main/java/org/alliancegenome/curation_api/services/GeneService.java b/src/main/java/org/alliancegenome/curation_api/services/GeneService.java index f07daeb96..3a86bae0d 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/GeneService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/GeneService.java @@ -159,10 +159,75 @@ public void addGeoXref(String entrezId, BackendBulkDataProvider dataProvider) th } if (allianceGene == null) { - throw new KnownIssueValidationException("crossReferences - referencedCurie: " + ValidationConstants.INVALID_MESSAGE + " (" + "NCBI_Gene:" + entrezId + ")"); + throw new KnownIssueValidationException("crossReferences - referencedCurie: " + ValidationConstants.INVALID_MESSAGE + " (NCBI_Gene:" + entrezId + ")"); } - geneXrefHelper.addGeoCrossReference(allianceGene, "NCBI_Gene:" + entrezId); + allianceGene = geneXrefHelper.addGeoCrossReference(allianceGene, "NCBI_Gene:" + entrezId); + + if (allianceGene == null) { + throw new ObjectValidationException(entrezId, "resourceDescriptorPage: " + ValidationConstants.INVALID_MESSAGE + " (NCBI_Gene:gene/other_expression)"); + } + } + + @Transactional + public void addExpressionAtlasXref(String identifier, BackendBulkDataProvider dataProvider) throws ValidationException { + NCBITaxonTerm taxon = ncbiTaxonTermService.getByCurie(dataProvider.canonicalTaxonCurie).getEntity(); + if (taxon == null) { + throw new ObjectValidationException(identifier, "dataProvider - canonical taxon: " + ValidationConstants.INVALID_MESSAGE + " (" + dataProvider.canonicalTaxonCurie + " not found)"); + } + + + String searchField; + String searchValue; + String referencedCurie; + String resourceDescriptorPrefix; + switch (dataProvider) { + case FB -> { + searchField = "modEntityId"; + searchValue = "FB:" + identifier; + referencedCurie = searchValue; + resourceDescriptorPrefix = "FB"; + } + case SGD -> { + searchField = "geneSymbol.displayText"; + searchValue = identifier; + referencedCurie = "SGD:" + identifier; + resourceDescriptorPrefix = "SGD"; + } + default -> { + searchField = "crossReferences.referencedCurie"; + searchValue = "ENSEMBL:" + identifier; + referencedCurie = searchValue; + resourceDescriptorPrefix = "ENSEMBL"; + } + } + + Gene allianceGene = null; + SearchResponse searchResponse = findByField(searchField, searchValue); + if (searchResponse != null) { + // Need to check that returned gene belongs to MOD corresponding to taxon + for (Gene searchResult : searchResponse.getResults()) { + String resultDataProviderCoreGenus = BackendBulkDataProvider.getCoreGenus(searchResult.getDataProvider().getSourceOrganization().getAbbreviation()); + if (taxon.getName().startsWith(resultDataProviderCoreGenus + " ")) { + allianceGene = searchResult; + break; + } + if (StringUtils.equals(taxon.getCurie(), "NCBITaxon:9606") && StringUtils.equals(searchResult.getDataProvider().getSourceOrganization().getAbbreviation(), "RGD")) { + allianceGene = searchResult; + break; + } + } + } + + if (allianceGene == null) { + throw new KnownIssueValidationException(searchField + ": " + ValidationConstants.INVALID_MESSAGE + " (" + searchValue + ")"); + } + + allianceGene = geneXrefHelper.addExpressionAtlasXref(allianceGene, resourceDescriptorPrefix, referencedCurie); + + if (allianceGene == null) { + throw new ObjectValidationException(identifier, "resourceDescriptorPage: " + ValidationConstants.INVALID_MESSAGE + " (" + resourceDescriptorPrefix + ":expression_atlas)"); + } } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/helpers/crossReferences/GenePhenotypeAnnotationXrefHelper.java b/src/main/java/org/alliancegenome/curation_api/services/helpers/crossReferences/GenePhenotypeAnnotationXrefHelper.java index ad34967e7..7d0d37fb0 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/helpers/crossReferences/GenePhenotypeAnnotationXrefHelper.java +++ b/src/main/java/org/alliancegenome/curation_api/services/helpers/crossReferences/GenePhenotypeAnnotationXrefHelper.java @@ -41,7 +41,7 @@ public Gene addGenePhenotypeCrossReference(BackendBulkDataProvider dataProvider, xref.setReferencedCurie(gene.getIdentifier()); xref.setResourceDescriptorPage(rdp); - List updatedXrefs = xrefService.getUpdatedXrefList(List.of(xref), gene.getCrossReferences()); + List updatedXrefs = xrefService.getUpdatedXrefList(List.of(xref), gene.getCrossReferences(), true); if (gene.getCrossReferences() != null) { gene.getCrossReferences().clear(); diff --git a/src/main/java/org/alliancegenome/curation_api/services/helpers/crossReferences/GeneXrefHelper.java b/src/main/java/org/alliancegenome/curation_api/services/helpers/crossReferences/GeneXrefHelper.java index 3c2be1601..72fe32138 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/helpers/crossReferences/GeneXrefHelper.java +++ b/src/main/java/org/alliancegenome/curation_api/services/helpers/crossReferences/GeneXrefHelper.java @@ -1,7 +1,9 @@ package org.alliancegenome.curation_api.services.helpers.crossReferences; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import org.alliancegenome.curation_api.dao.GeneDAO; import org.alliancegenome.curation_api.model.entities.CrossReference; @@ -22,20 +24,24 @@ public class GeneXrefHelper { @Inject GeneDAO geneDAO; ResourceDescriptorPage ncbiGeneOtherExpressionPage; + Map expressionAtlasPageMap = new HashMap<>(); @Transactional - public void addGeoCrossReference(Gene gene, String entrezCurie) { + public Gene addGeoCrossReference(Gene gene, String entrezCurie) { CrossReference xref = new CrossReference(); if (ncbiGeneOtherExpressionPage == null) { ncbiGeneOtherExpressionPage = rdpService.getPageForResourceDescriptor("NCBI_Gene", "gene/other_expression"); + if (ncbiGeneOtherExpressionPage == null) { + return null; + } } xref.setDisplayName("GEO"); xref.setReferencedCurie(entrezCurie); xref.setResourceDescriptorPage(ncbiGeneOtherExpressionPage); - List updatedXrefs = xrefService.getUpdatedXrefList(List.of(xref), gene.getCrossReferences()); + List updatedXrefs = xrefService.getUpdatedXrefList(List.of(xref), gene.getCrossReferences(), true); if (gene.getCrossReferences() != null) { gene.getCrossReferences().clear(); @@ -47,7 +53,39 @@ public void addGeoCrossReference(Gene gene, String entrezCurie) { gene.getCrossReferences().addAll(updatedXrefs); } - geneDAO.persist(gene); + return geneDAO.persist(gene); } + + + @Transactional + public Gene addExpressionAtlasXref(Gene gene, String resourceDescriptorPrefix, String referencedCurie) { + + CrossReference xref = new CrossReference(); + + if (!expressionAtlasPageMap.containsKey(resourceDescriptorPrefix)) { + ResourceDescriptorPage rdp = rdpService.getPageForResourceDescriptor(resourceDescriptorPrefix, "expression_atlas"); + if (rdp == null) { + return null; + } + expressionAtlasPageMap.put(resourceDescriptorPrefix, rdp); + } + xref.setDisplayName("Expression Atlas"); + xref.setReferencedCurie(referencedCurie); + xref.setResourceDescriptorPage(expressionAtlasPageMap.get(resourceDescriptorPrefix)); + + List updatedXrefs = xrefService.getUpdatedXrefList(List.of(xref), gene.getCrossReferences(), true); + + if (gene.getCrossReferences() != null) { + gene.getCrossReferences().clear(); + } + if (updatedXrefs != null) { + if (gene.getCrossReferences() == null) { + gene.setCrossReferences(new ArrayList<>()); + } + gene.getCrossReferences().addAll(updatedXrefs); + } + + return geneDAO.persist(gene); + } } diff --git a/src/main/resources/db/migration/v0.38.0.17__cleanup_expression_atlas_dataproviders.sql b/src/main/resources/db/migration/v0.38.0.17__cleanup_expression_atlas_dataproviders.sql new file mode 100644 index 000000000..e372cc9a7 --- /dev/null +++ b/src/main/resources/db/migration/v0.38.0.17__cleanup_expression_atlas_dataproviders.sql @@ -0,0 +1,17 @@ +DELETE FROM dataprovider WHERE crossreference_id IN ( + SELECT id FROM crossreference WHERE resourcedescriptorpage_id = ( + SELECT id FROM resourcedescriptorpage WHERE name = 'expression_atlas' + ) +); + +DELETE FROM genomicentity_crossreference WHERE crossreferences_id IN ( + SELECT id FROM crossreference WHERE resourcedescriptorpage_id = ( + SELECT id FROM resourcedescriptorpage WHERE name = 'expression_atlas' + ) +); + +DELETE FROM crossreference WHERE resourcedescriptorpage_id = ( + SELECT id FROM resourcedescriptorpage WHERE name = 'expression_atlas' +); + +DELETE FROM organization WHERE abbreviation = 'HUMAN';