From b15c3972f75aa278451fea6db6ced4cd1d203f36 Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Wed, 28 Aug 2024 18:14:51 -0600 Subject: [PATCH 01/19] Initial Code for removing dups --- .../curation_api/dao/DataProviderDAO.java | 42 +++++++++++++++++ .../curation_api/dao/OrganizationDAO.java | 21 +++++++++ .../curation_api/dao/base/BaseSQLDAO.java | 9 +--- .../jobs/executors/Gff3Executor.java | 2 + .../curation_api/main/Main.java | 6 --- .../model/entities/AssemblyComponent.java | 3 +- .../model/entities/CodingSequence.java | 3 +- .../curation_api/model/entities/Exon.java | 3 +- .../model/entities/Organization.java | 1 + .../model/entities/Transcript.java | 3 +- .../services/AssemblyComponentService.java | 2 +- .../services/DataProviderService.java | 45 +++++-------------- .../curation_api/services/Gff3Service.java | 2 +- .../DiseaseAnnotationValidator.java | 2 +- .../validation/dto/Gff3DtoValidator.java | 2 +- ...neExpressionAnnotationFmsDTOValidator.java | 35 ++++++++++----- .../PhenotypeAnnotationFmsDTOValidator.java | 2 +- ...quenceTargetingReagentFmsDTOValidator.java | 2 +- .../v0.37.0.10__remove_dataprovider_dups.sql | 21 +++++++++ .../v0.37.0.11__remove_dataprovider_dups.sql | 21 +++++++++ .../v0.37.0.12__remove_dataprovider_dups.sql | 21 +++++++++ .../v0.37.0.13__remove_dataprovider_dups.sql | 21 +++++++++ .../v0.37.0.14__remove_dataprovider_dups.sql | 21 +++++++++ .../v0.37.0.15__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.16__remove_dataprovider_dups.sql | 21 +++++++++ .../v0.37.0.17__remove_dataprovider_dups.sql | 21 +++++++++ .../v0.37.0.18__remove_dataprovider_dups.sql | 18 ++++++++ .../v0.37.0.19__remove_dataprovider_dups.sql | 19 ++++++++ .../v0.37.0.20__remove_dataprovider_dups.sql | 19 ++++++++ .../v0.37.0.21__remove_dataprovider_dups.sql | 19 ++++++++ .../v0.37.0.22__remove_dataprovider_dups.sql | 19 ++++++++ .../v0.37.0.23__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.24__remove_dataprovider_dups.sql | 25 +++++++++++ .../v0.37.0.25__remove_dataprovider_dups.sql | 22 +++++++++ .../v0.37.0.26__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.27__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.28__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.29__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.30__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.31__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.32__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.33__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.34__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.35__remove_dataprovider_dups.sql | 3 ++ .../v0.37.0.36__remove_dataprovider_dups.sql | 22 +++++++++ .../v0.37.0.37__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.38__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.39__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.40__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.41__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.42__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.43__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.44__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.45__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.46__remove_dataprovider_dups.sql | 5 +++ .../v0.37.0.7__remove_dataprovider_dups.sql | 11 +++++ .../v0.37.0.8__remove_dataprovider_dups.sql | 20 +++++++++ .../v0.37.0.9__remove_dataprovider_dups.sql | 21 +++++++++ 58 files changed, 884 insertions(+), 71 deletions(-) create mode 100644 src/main/resources/db/migration/v0.37.0.10__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.11__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.12__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.13__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.14__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.15__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.16__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.17__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.18__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.19__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.20__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.21__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.22__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.23__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.24__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.25__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.26__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.27__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.28__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.29__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.30__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.31__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.32__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.33__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.34__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.35__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.36__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.37__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.38__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.39__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.40__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.41__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.42__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.43__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.44__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.45__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.7__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.8__remove_dataprovider_dups.sql create mode 100644 src/main/resources/db/migration/v0.37.0.9__remove_dataprovider_dups.sql diff --git a/src/main/java/org/alliancegenome/curation_api/dao/DataProviderDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/DataProviderDAO.java index 420710b65..dcd7a918b 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/DataProviderDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/DataProviderDAO.java @@ -1,14 +1,56 @@ package org.alliancegenome.curation_api.dao; +import java.util.HashMap; + import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; +import org.alliancegenome.curation_api.model.entities.CrossReference; import org.alliancegenome.curation_api.model.entities.DataProvider; +import org.alliancegenome.curation_api.model.entities.Organization; +import org.alliancegenome.curation_api.response.SearchResponse; import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; +import jakarta.transaction.Transactional; @ApplicationScoped public class DataProviderDAO extends BaseSQLDAO { + @Inject CrossReferenceDAO crossReferenceDAO; + + private HashMap dataProviderCache = new HashMap<>(); + protected DataProviderDAO() { super(DataProvider.class); } + + @Transactional + public DataProvider getOrCreateDataProvider(Organization sourceOrganization) { + + if (dataProviderCache.containsKey(sourceOrganization.getAbbreviation())) { + return dataProviderCache.get(sourceOrganization.getAbbreviation()); + } + + SearchResponse orgResponse = findByField("sourceOrganization.abbreviation", sourceOrganization.getAbbreviation()); + if (orgResponse != null) { + DataProvider member = orgResponse.getSingleResult(); + if (member != null && member.getSourceOrganization() != null) { + return member; + } + } else { + DataProvider dataProvider = new DataProvider(); + + dataProvider.setSourceOrganization(sourceOrganization); + + CrossReference xref = new CrossReference(); + xref.setDisplayName(sourceOrganization.getAbbreviation()); + xref.setReferencedCurie(sourceOrganization.getAbbreviation()); + xref.setResourceDescriptorPage(sourceOrganization.getHomepageResourceDescriptorPage()); + dataProvider.setCrossReference(crossReferenceDAO.persist(xref)); + + return persist(dataProvider); + + } + + return null; + } } diff --git a/src/main/java/org/alliancegenome/curation_api/dao/OrganizationDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/OrganizationDAO.java index 18fbb5f0a..37d33336e 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/OrganizationDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/OrganizationDAO.java @@ -1,15 +1,36 @@ package org.alliancegenome.curation_api.dao; +import java.util.HashMap; + import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; import org.alliancegenome.curation_api.model.entities.Organization; +import org.alliancegenome.curation_api.response.SearchResponse; import jakarta.enterprise.context.ApplicationScoped; @ApplicationScoped public class OrganizationDAO extends BaseSQLDAO { + private HashMap organizationCache = new HashMap<>(); + protected OrganizationDAO() { super(Organization.class); } + public Organization getOrCreateOrganization(String abbreviation) { + if (organizationCache.containsKey(abbreviation)) { + return organizationCache.get(abbreviation); + } else { + HashMap params = new HashMap<>(); + params.put("abbreviation", abbreviation); + SearchResponse resp = findByParams(params); + if (resp != null) { + return resp.getSingleResult(); + } else { + Organization o = new Organization(); + o.setAbbreviation(abbreviation); + return persist(o); + } + } + } } diff --git a/src/main/java/org/alliancegenome/curation_api/dao/base/BaseSQLDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/base/BaseSQLDAO.java index 94bf5a5a1..50869c7d4 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/base/BaseSQLDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/base/BaseSQLDAO.java @@ -17,7 +17,6 @@ import org.alliancegenome.curation_api.response.SearchResponse; import org.alliancegenome.curation_api.services.processing.IndexProcessDisplayService; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; -import org.eclipse.microprofile.config.inject.ConfigProperty; import org.elasticsearch.index.query.Operator; import org.hibernate.exception.ConstraintViolationException; import org.hibernate.query.sqm.internal.QuerySqmImpl; @@ -58,10 +57,6 @@ public class BaseSQLDAO extends BaseEntityDAO { - @ConfigProperty(name = "quarkus.hibernate-search-orm.elasticsearch.hosts") String esHosts; - - @ConfigProperty(name = "quarkus.hibernate-search-orm.elasticsearch.protocol") String esProtocol; - @Inject protected EntityManager entityManager; @Inject protected SearchSession searchSession; @Inject protected IndexProcessDisplayService indexProcessDisplayService; @@ -361,7 +356,7 @@ public void reindexEverything(Integer batchSizeToLoadObjects, Integer idFetchSiz Reflections reflections = new Reflections("org.alliancegenome.curation_api"); Set> annotatedClasses = reflections.get(TypesAnnotated.with(Indexed.class).asClass(reflections.getConfiguration().getClassLoaders())); - ProcessDisplayHelper ph = new ProcessDisplayHelper(2000); + ProcessDisplayHelper ph = new ProcessDisplayHelper(5000); ph.addDisplayHandler(indexProcessDisplayService); ph.startProcess("Mass Index Everything"); MassIndexer indexer = searchSession.massIndexer(annotatedClasses).batchSizeToLoadObjects(batchSizeToLoadObjects).idFetchSize(idFetchSize).dropAndCreateSchemaOnStart(true).mergeSegmentsOnFinish(false).typesToIndexInParallel(typesToIndexInParallel).threadsToLoadObjects(threadsToLoadObjects) @@ -402,7 +397,7 @@ public void reindex(Class objectClass, Integer batchSizeToLoadObjects, Intege MassIndexer indexer = searchSession.massIndexer(objectClass).batchSizeToLoadObjects(batchSizeToLoadObjects).idFetchSize(idFetchSize).dropAndCreateSchemaOnStart(true).mergeSegmentsOnFinish(false).typesToIndexInParallel(typesToIndexInParallel).threadsToLoadObjects(threadsToLoadObjects) .monitor(new MassIndexingMonitor() { - ProcessDisplayHelper ph = new ProcessDisplayHelper(2000); + ProcessDisplayHelper ph = new ProcessDisplayHelper(5000); @Override public void documentsAdded(long increment) { diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java index 9ba729cca..da955293f 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java @@ -75,6 +75,8 @@ public void execLoad(BulkLoadFile bulkLoadFile) { List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + gffData.clear(); + Map> idsAdded = createIdsAddedMap(); Map> previousIds = getPreviouslyLoadedIds(dataProvider); diff --git a/src/main/java/org/alliancegenome/curation_api/main/Main.java b/src/main/java/org/alliancegenome/curation_api/main/Main.java index e3da100a3..7b1c960ee 100644 --- a/src/main/java/org/alliancegenome/curation_api/main/Main.java +++ b/src/main/java/org/alliancegenome/curation_api/main/Main.java @@ -6,14 +6,8 @@ @QuarkusMain public class Main { - - private Main() { - // Hidden from view, as it is a utility class - } - public static void main(String[] args) { Log.info("Running main method of quarkus"); Quarkus.run(args); } - } diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/AssemblyComponent.java b/src/main/java/org/alliancegenome/curation_api/model/entities/AssemblyComponent.java index 862c73da6..cbf53c0fd 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/AssemblyComponent.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/AssemblyComponent.java @@ -5,7 +5,6 @@ import org.alliancegenome.curation_api.view.View; import org.eclipse.microprofile.openapi.annotations.media.Schema; import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.Indexed; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; @@ -19,7 +18,7 @@ import lombok.EqualsAndHashCode; import lombok.ToString; -@Indexed +//@Indexed @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/CodingSequence.java b/src/main/java/org/alliancegenome/curation_api/model/entities/CodingSequence.java index 6f943b836..c223316d6 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/CodingSequence.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/CodingSequence.java @@ -14,7 +14,6 @@ import org.hibernate.search.engine.backend.types.Sortable; import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.FullTextField; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.Indexed; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.KeywordField; @@ -31,7 +30,7 @@ import lombok.EqualsAndHashCode; import lombok.ToString; -@Indexed +//@Indexed @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/Exon.java b/src/main/java/org/alliancegenome/curation_api/model/entities/Exon.java index 79ea67850..784485a45 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/Exon.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/Exon.java @@ -14,7 +14,6 @@ import org.hibernate.search.engine.backend.types.Sortable; import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.FullTextField; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.Indexed; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.KeywordField; @@ -31,7 +30,7 @@ import lombok.EqualsAndHashCode; import lombok.ToString; -@Indexed +//@Indexed @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/Organization.java b/src/main/java/org/alliancegenome/curation_api/model/entities/Organization.java index 8520a597b..6ed8573a7 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/Organization.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/Organization.java @@ -33,6 +33,7 @@ @AGRCurationSchemaVersion(min = "1.4.1", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { Agent.class }) @Table(indexes = { @Index(name = "organization_homepageresourcedescriptorpage_id_index", columnList = "homepageresourcedescriptorpage_id"), + @Index(name = "organization_abbreviation_index", columnList = "abbreviation"), @Index(name = "organization_createdby_index", columnList = "createdBy_id"), @Index(name = "organization_updatedby_index", columnList = "updatedBy_id") }) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/Transcript.java b/src/main/java/org/alliancegenome/curation_api/model/entities/Transcript.java index 7480f83ce..3ef380bde 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/Transcript.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/Transcript.java @@ -12,7 +12,6 @@ import org.alliancegenome.curation_api.view.View; import org.eclipse.microprofile.openapi.annotations.media.Schema; import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.Indexed; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; @@ -26,7 +25,7 @@ import lombok.EqualsAndHashCode; import lombok.ToString; -@Indexed +//@Indexed @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) diff --git a/src/main/java/org/alliancegenome/curation_api/services/AssemblyComponentService.java b/src/main/java/org/alliancegenome/curation_api/services/AssemblyComponentService.java index 50d797d3c..b5065e08d 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/AssemblyComponentService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/AssemblyComponentService.java @@ -77,7 +77,7 @@ private AssemblyComponent findAssemblyComponentOrCreateDB(String name, String as assemblyComponent.setGenomeAssembly(assemblyResponse.getSingleResult()); } assemblyComponent.setTaxon(ncbiTaxonTermService.getByCurie(taxonCurie).getEntity()); - assemblyComponent.setDataProvider(dataProviderService.createOrganizationDataProvider(dataProviderAbbreviation)); + assemblyComponent.setDataProvider(dataProviderService.getDefaultDataProvider(dataProviderAbbreviation)); return assemblyComponentDAO.persist(assemblyComponent); } diff --git a/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java b/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java index cb0807349..9796e9092 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java @@ -1,16 +1,12 @@ package org.alliancegenome.curation_api.services; import org.alliancegenome.curation_api.auth.AuthenticatedUser; -import org.alliancegenome.curation_api.dao.CrossReferenceDAO; import org.alliancegenome.curation_api.dao.DataProviderDAO; import org.alliancegenome.curation_api.dao.OrganizationDAO; import org.alliancegenome.curation_api.model.entities.AllianceMember; -import org.alliancegenome.curation_api.model.entities.CrossReference; import org.alliancegenome.curation_api.model.entities.DataProvider; -import org.alliancegenome.curation_api.model.entities.Organization; import org.alliancegenome.curation_api.model.entities.Person; import org.alliancegenome.curation_api.response.ObjectResponse; -import org.alliancegenome.curation_api.response.SearchResponse; import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; import org.alliancegenome.curation_api.services.validation.DataProviderValidator; @@ -28,7 +24,6 @@ public class DataProviderService extends BaseEntityCrudService orgResponse = organizationDAO.findByField("abbreviation", organizationAbbreviation); - if (orgResponse == null || orgResponse.getSingleResult() == null) { - return null; - } - Organization member = orgResponse.getSingleResult(); - - return createDataProvider(member); + public DataProvider getAllianceDataProvider() { + return getDefaultDataProvider("Alliance"); } - - public DataProvider createAllianceDataProvider() { - return createOrganizationDataProvider("Alliance"); - } - - private DataProvider createDataProvider(Organization member) { - DataProvider dataProvider = new DataProvider(); - - dataProvider.setSourceOrganization(member); - - CrossReference xref = new CrossReference(); - xref.setDisplayName(member.getAbbreviation()); - xref.setReferencedCurie(member.getAbbreviation()); - xref.setResourceDescriptorPage(member.getHomepageResourceDescriptorPage()); - dataProvider.setCrossReference(crossReferenceDAO.persist(xref)); - - return dataProviderDAO.persist(dataProvider); + + @Transactional + public DataProvider getDefaultDataProvider(String sourceOrganizationAbbreviation) { + return dataProviderDAO.getOrCreateDataProvider(organizationDAO.getOrCreateOrganization(sourceOrganizationAbbreviation)); } @Transactional @@ -87,4 +62,6 @@ public ObjectResponse upsert(DataProvider uiEntity) { public ObjectResponse validate(DataProvider uiEntity) { return dataProviderValidator.validateDataProvider(uiEntity, null, true); } + + } diff --git a/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java b/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java index 99500c959..307dffc75 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java +++ b/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java @@ -80,7 +80,7 @@ public String loadGenomeAssembly(String assemblyName, List gffHeaderData if (resp == null || resp.getSingleResult() == null) { GenomeAssembly assembly = new GenomeAssembly(); assembly.setModEntityId(assemblyName); - assembly.setDataProvider(dataProviderService.createOrganizationDataProvider(dataProvider.sourceOrganization)); + assembly.setDataProvider(dataProviderService.getDefaultDataProvider(dataProvider.sourceOrganization)); assembly.setTaxon(ncbiTaxonTermService.getByCurie(dataProvider.canonicalTaxonCurie).getEntity()); genomeAssemblyDAO.persist(assembly); diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/DiseaseAnnotationValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/DiseaseAnnotationValidator.java index ee929311d..d0fdbb4da 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/DiseaseAnnotationValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/DiseaseAnnotationValidator.java @@ -150,7 +150,7 @@ public DataProvider validateSecondaryDataProvider(DiseaseAnnotation uiEntity, Di if (uiEntity.getSecondaryDataProvider() == null) { if (dbEntity.getId() == null) { - uiEntity.setSecondaryDataProvider(dataProviderService.createAllianceDataProvider()); + uiEntity.setSecondaryDataProvider(dataProviderService.getAllianceDataProvider()); if (uiEntity.getSecondaryDataProvider() == null) { return null; } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java index 08a9496aa..87f0ebcfa 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java @@ -163,7 +163,7 @@ public Transcript validateTranscriptEntry(Gff3DTO dto, Map attri private ObjectResponse validateGffEntity(E entity, Gff3DTO dto, Map attributes, BackendBulkDataProvider dataProvider) { ObjectResponse geResponse = new ObjectResponse(); - entity.setDataProvider(dataProviderService.createOrganizationDataProvider(dataProvider.sourceOrganization)); + entity.setDataProvider(dataProviderService.getDefaultDataProvider(dataProvider.sourceOrganization)); entity.setTaxon(ncbiTaxonTermService.getByCurie(dataProvider.canonicalTaxonCurie).getEntity()); geResponse.setEntity(entity); diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java index fc3d9fe02..5467b1eb3 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java @@ -1,15 +1,28 @@ package org.alliancegenome.curation_api.services.validation.dto.fms; -import jakarta.enterprise.context.RequestScoped; -import jakarta.inject.Inject; +import java.time.OffsetDateTime; +import java.time.format.DateTimeParseException; +import java.util.ArrayList; +import java.util.List; + import org.alliancegenome.curation_api.constants.ValidationConstants; import org.alliancegenome.curation_api.constants.VocabularyConstants; import org.alliancegenome.curation_api.dao.GeneExpressionAnnotationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; import org.alliancegenome.curation_api.exceptions.ObjectValidationException; -import org.alliancegenome.curation_api.model.entities.*; -import org.alliancegenome.curation_api.model.entities.ontology.*; +import org.alliancegenome.curation_api.model.entities.AnatomicalSite; +import org.alliancegenome.curation_api.model.entities.ExpressionPattern; +import org.alliancegenome.curation_api.model.entities.Gene; +import org.alliancegenome.curation_api.model.entities.GeneExpressionAnnotation; +import org.alliancegenome.curation_api.model.entities.Reference; +import org.alliancegenome.curation_api.model.entities.TemporalContext; +import org.alliancegenome.curation_api.model.entities.VocabularyTerm; +import org.alliancegenome.curation_api.model.entities.ontology.AnatomicalTerm; +import org.alliancegenome.curation_api.model.entities.ontology.GOTerm; +import org.alliancegenome.curation_api.model.entities.ontology.MMOTerm; +import org.alliancegenome.curation_api.model.entities.ontology.StageTerm; +import org.alliancegenome.curation_api.model.entities.ontology.UBERONTerm; import org.alliancegenome.curation_api.model.ingest.dto.fms.GeneExpressionFmsDTO; import org.alliancegenome.curation_api.model.ingest.dto.fms.UberonSlimTermDTO; import org.alliancegenome.curation_api.response.ObjectResponse; @@ -19,14 +32,16 @@ import org.alliancegenome.curation_api.services.ReferenceService; import org.alliancegenome.curation_api.services.VocabularyTermService; import org.alliancegenome.curation_api.services.helpers.annotations.GeneExpressionAnnotationUniqueIdHelper; -import org.alliancegenome.curation_api.services.ontology.*; +import org.alliancegenome.curation_api.services.ontology.AnatomicalTermService; +import org.alliancegenome.curation_api.services.ontology.GoTermService; +import org.alliancegenome.curation_api.services.ontology.MmoTermService; +import org.alliancegenome.curation_api.services.ontology.StageTermService; +import org.alliancegenome.curation_api.services.ontology.UberonTermService; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.ObjectUtils; -import java.time.OffsetDateTime; -import java.time.format.DateTimeParseException; -import java.util.ArrayList; -import java.util.List; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; @RequestScoped public class GeneExpressionAnnotationFmsDTOValidator { @@ -121,7 +136,7 @@ public GeneExpressionAnnotation validateAnnotation(GeneExpressionFmsDTO geneExpr geneExpressionAnnotation.getExpressionPattern().setWhenExpressed(temporalContext); } - geneExpressionAnnotation.setDataProvider(dataProviderService.createOrganizationDataProvider(dataProvider.sourceOrganization)); + geneExpressionAnnotation.setDataProvider(dataProviderService.getDefaultDataProvider(dataProvider.sourceOrganization)); geneExpressionAnnotation.setRelation(vocabularyTermService.getTermInVocabulary(VocabularyConstants.GENE_EXPRESSION_VOCABULARY, VocabularyConstants.GENE_EXPRESSION_RELATION_TERM).getEntity()); geneExpressionAnnotation.setObsolete(false); geneExpressionAnnotation.setInternal(false); diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/PhenotypeAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/PhenotypeAnnotationFmsDTOValidator.java index 8ccc75bb0..ee59dceb0 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/PhenotypeAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/PhenotypeAnnotationFmsDTOValidator.java @@ -88,7 +88,7 @@ public ObjectResponse validatePhenotypeAnnota annotation.setConditionRelations(null); } - annotation.setDataProvider(dataProviderService.createOrganizationDataProvider(beDataProvider.sourceOrganization)); + annotation.setDataProvider(dataProviderService.getDefaultDataProvider(beDataProvider.sourceOrganization)); annotation.setRelation(vocabularyTermService.getTermInVocabulary(VocabularyConstants.PHENOTYPE_RELATION_VOCABULARY, "has_phenotype").getEntity()); CrossReference evidenceXref = null; diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/SequenceTargetingReagentFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/SequenceTargetingReagentFmsDTOValidator.java index 55cdec3ba..b73581071 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/SequenceTargetingReagentFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/SequenceTargetingReagentFmsDTOValidator.java @@ -86,7 +86,7 @@ public SequenceTargetingReagent validateSQTRFmsDTO(SequenceTargetingReagentFmsDT } if (beDataProvider != null) { - sqtr.setDataProvider(dataProviderService.createOrganizationDataProvider(beDataProvider.sourceOrganization)); + sqtr.setDataProvider(dataProviderService.getDefaultDataProvider(beDataProvider.sourceOrganization)); } if (sqtrResponse.hasErrors()) { diff --git a/src/main/resources/db/migration/v0.37.0.10__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.10__remove_dataprovider_dups.sql new file mode 100644 index 000000000..7d150a49e --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.10__remove_dataprovider_dups.sql @@ -0,0 +1,21 @@ +-- Update all BiologicalEntity.dataProvider to point to a MGI dataprovider that is first in the DB + +UPDATE biologicalentity be +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'MGI' + AND cr1.referencedCurie = 'MGI' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + be.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'MGI'; + + diff --git a/src/main/resources/db/migration/v0.37.0.11__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.11__remove_dataprovider_dups.sql new file mode 100644 index 000000000..4ced7b6c7 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.11__remove_dataprovider_dups.sql @@ -0,0 +1,21 @@ +-- Update all BiologicalEntity.dataProvider to point to a WB dataprovider that is first in the DB + +UPDATE biologicalentity be +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'WB' + AND cr1.referencedCurie = 'WB' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + be.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'WB'; + + diff --git a/src/main/resources/db/migration/v0.37.0.12__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.12__remove_dataprovider_dups.sql new file mode 100644 index 000000000..dbac17613 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.12__remove_dataprovider_dups.sql @@ -0,0 +1,21 @@ +-- Update all BiologicalEntity.dataProvider to point to a SGD dataprovider that is first in the DB + +UPDATE biologicalentity be +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'SGD' + AND cr1.referencedCurie = 'SGD' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + be.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'SGD'; + + diff --git a/src/main/resources/db/migration/v0.37.0.13__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.13__remove_dataprovider_dups.sql new file mode 100644 index 000000000..307820d78 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.13__remove_dataprovider_dups.sql @@ -0,0 +1,21 @@ +-- Update all BiologicalEntity.dataProvider to point to a RGD dataprovider that is first in the DB + +UPDATE biologicalentity be +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'RGD' + AND cr1.referencedCurie = 'RGD' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + be.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'RGD'; + + diff --git a/src/main/resources/db/migration/v0.37.0.14__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.14__remove_dataprovider_dups.sql new file mode 100644 index 000000000..734568644 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.14__remove_dataprovider_dups.sql @@ -0,0 +1,21 @@ +-- Update all BiologicalEntity.dataProvider to point to a ZFIN dataprovider that is first in the DB + +UPDATE biologicalentity be +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'ZFIN' + AND cr1.referencedCurie = 'ZFIN' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + be.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'ZFIN'; + + diff --git a/src/main/resources/db/migration/v0.37.0.15__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.15__remove_dataprovider_dups.sql new file mode 100644 index 000000000..df089c801 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.15__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +-- Update all BiologicalEntity.dataProvider to point to a Alliance dataprovider that is first in the DB + +UPDATE biologicalentity be +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'Alliance' + AND cr1.referencedCurie = 'Alliance' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + be.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'Alliance'; + diff --git a/src/main/resources/db/migration/v0.37.0.16__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.16__remove_dataprovider_dups.sql new file mode 100644 index 000000000..a2f85ddeb --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.16__remove_dataprovider_dups.sql @@ -0,0 +1,21 @@ +-- Update all annotation.dataProvider to point to a ZFIN dataprovider that is first in the DB + +UPDATE annotation an +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'ZFIN' + AND cr1.referencedCurie = 'ZFIN' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + an.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'ZFIN'; + + diff --git a/src/main/resources/db/migration/v0.37.0.17__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.17__remove_dataprovider_dups.sql new file mode 100644 index 000000000..b4c0f063c --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.17__remove_dataprovider_dups.sql @@ -0,0 +1,21 @@ + + +UPDATE annotation an +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'Alliance' + AND cr1.referencedCurie = 'Alliance' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + an.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'Alliance'; + + diff --git a/src/main/resources/db/migration/v0.37.0.18__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.18__remove_dataprovider_dups.sql new file mode 100644 index 000000000..04c19384b --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.18__remove_dataprovider_dups.sql @@ -0,0 +1,18 @@ +UPDATE annotation an +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'XB' + AND cr1.referencedCurie = 'XB' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + an.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'XB'; + diff --git a/src/main/resources/db/migration/v0.37.0.19__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.19__remove_dataprovider_dups.sql new file mode 100644 index 000000000..5564cd288 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.19__remove_dataprovider_dups.sql @@ -0,0 +1,19 @@ +UPDATE annotation an +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'MGI' + AND cr1.referencedCurie = 'MGI' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + an.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'MGI'; + + diff --git a/src/main/resources/db/migration/v0.37.0.20__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.20__remove_dataprovider_dups.sql new file mode 100644 index 000000000..0fe17c326 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.20__remove_dataprovider_dups.sql @@ -0,0 +1,19 @@ +UPDATE annotation an +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'RGD' + AND cr1.referencedCurie = 'RGD' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + an.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'RGD'; + + diff --git a/src/main/resources/db/migration/v0.37.0.21__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.21__remove_dataprovider_dups.sql new file mode 100644 index 000000000..b48953348 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.21__remove_dataprovider_dups.sql @@ -0,0 +1,19 @@ +UPDATE annotation an +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'SGD' + AND cr1.referencedCurie = 'SGD' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + an.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'SGD'; + + diff --git a/src/main/resources/db/migration/v0.37.0.22__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.22__remove_dataprovider_dups.sql new file mode 100644 index 000000000..56fa0cd3f --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.22__remove_dataprovider_dups.sql @@ -0,0 +1,19 @@ +UPDATE annotation an +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'FB' + AND cr1.referencedCurie = 'FB' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + an.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'FB'; + + diff --git a/src/main/resources/db/migration/v0.37.0.23__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.23__remove_dataprovider_dups.sql new file mode 100644 index 000000000..6b717c108 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.23__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ + +UPDATE annotation an +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'WB' + AND cr1.referencedCurie = 'WB' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + an.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'WB'; + + diff --git a/src/main/resources/db/migration/v0.37.0.24__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.24__remove_dataprovider_dups.sql new file mode 100644 index 000000000..221275a12 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.24__remove_dataprovider_dups.sql @@ -0,0 +1,25 @@ +CREATE TABLE dataprovider_ids_to_keep ( + id bigint PRIMARY KEY +); +CREATE TABLE crossreference_ids_to_delete ( + id bigint +); +CREATE TABLE dataprovider_ids_to_delete ( + id bigint +); + +-- select all the dataproviders that we are going to keep +INSERT INTO dataprovider_ids_to_keep (id) SELECT dataprovider_id FROM annotation where dataprovider_id is not null ON CONFLICT (id) DO NOTHING; -- 191431 +INSERT INTO dataprovider_ids_to_keep (id) SELECT dataprovider_id FROM biologicalentity where dataprovider_id is not null ON CONFLICT (id) DO NOTHING; -- 6241140 +INSERT INTO dataprovider_ids_to_keep (id) SELECT dataprovider_id FROM chromosome where dataprovider_id is not null ON CONFLICT (id) DO NOTHING; -- 0 +INSERT INTO dataprovider_ids_to_keep (id) SELECT secondarydataprovider_id FROM diseaseannotation where secondarydataprovider_id is not null ON CONFLICT (id) DO NOTHING; -- 14380 +INSERT INTO dataprovider_ids_to_keep (id) SELECT dataprovider_id FROM htpexpressiondatasetannotation where dataprovider_id is not null ON CONFLICT (id) DO NOTHING; -- 0 +INSERT INTO dataprovider_ids_to_keep (id) SELECT dataprovider_id FROM reagent where dataprovider_id is not null ON CONFLICT (id) DO NOTHING; -- 226431 +INSERT INTO dataprovider_ids_to_keep (id) SELECT dataprovider_id FROM species where dataprovider_id is not null ON CONFLICT (id) DO NOTHING; -- 10 +-- Total 6673392 + +INSERT INTO crossreference_ids_to_delete (id) select crossreference_id from dataprovider dp left join dataprovider_ids_to_keep dk on dp.id = dk.id where dp.crossreference_id is not null and dk.id is null; -- 42582734 +CREATE INDEX crossreference_ids_to_delete_index ON crossreference_ids_to_delete USING btree (id); + +INSERT INTO dataprovider_ids_to_delete (id) select dp.id from dataprovider dp left join dataprovider_ids_to_keep dk on dp.id = dk.id where dk.id is null; +CREATE INDEX dataprovider_ids_to_delete_index ON dataprovider_ids_to_delete USING btree (id); diff --git a/src/main/resources/db/migration/v0.37.0.25__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.25__remove_dataprovider_dups.sql new file mode 100644 index 000000000..ebfd4ed96 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.25__remove_dataprovider_dups.sql @@ -0,0 +1,22 @@ +-- This is faster then doing the query in one go as it seems this is done in memory whereas a larger query operates on disk which is much slower + +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 0) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 1000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 1250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 1500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 1750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 2000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 2250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 2500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 2750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 3000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 3250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 3500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 3750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 4000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 4250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 4500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 4750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.26__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.26__remove_dataprovider_dups.sql new file mode 100644 index 000000000..4fde4f3c5 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.26__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 5000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 5250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 5500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 5750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 6000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 6250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 6500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 6750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 7000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 7250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 7500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 7750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 8000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 8250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 8500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 8750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 9000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 9250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 9500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 9750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.27__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.27__remove_dataprovider_dups.sql new file mode 100644 index 000000000..2be88dac0 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.27__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 10000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 10250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 10500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 10750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 11000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 11250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 11500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 11750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 12000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 12250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 12500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 12750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 13000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 13250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 13500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 13750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 14000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 14250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 14500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 14750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.28__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.28__remove_dataprovider_dups.sql new file mode 100644 index 000000000..6186a22c7 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.28__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 15000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 15250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 15500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 15750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 16000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 16250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 16500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 16750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 17000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 17250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 17500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 17750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 18000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 18250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 18500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 18750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 19000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 19250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 19500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 19750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.29__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.29__remove_dataprovider_dups.sql new file mode 100644 index 000000000..c257dafb8 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.29__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 20000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 20250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 20500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 20750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 21000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 21250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 21500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 21750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 22000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 22250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 22500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 22750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 23000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 23250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 23500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 23750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 24000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 24250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 24500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 24750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.30__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.30__remove_dataprovider_dups.sql new file mode 100644 index 000000000..bfb3e0e10 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.30__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 25000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 25250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 25500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 25750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 26000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 26250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 26500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 26750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 27000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 27250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 27500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 27750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 28000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 28250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 28500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 28750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 29000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 29250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 29500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 29750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.31__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.31__remove_dataprovider_dups.sql new file mode 100644 index 000000000..6f301efc3 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.31__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 30000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 30250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 30500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 30750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 31000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 31250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 31500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 31750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 32000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 32250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 32500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 32750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 33000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 33250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 33500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 33750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 34000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 34250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 34500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 34750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.32__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.32__remove_dataprovider_dups.sql new file mode 100644 index 000000000..6d2e71d86 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.32__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 35000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 35250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 35500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 35750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 36000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 36250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 36500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 36750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 37000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 37250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 37500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 37750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 38000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 38250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 38500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 38750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 39000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 39250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 39500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 39750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.33__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.33__remove_dataprovider_dups.sql new file mode 100644 index 000000000..71f2c8077 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.33__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 40000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 40250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 40500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 40750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 41000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 41250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 41500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 41750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 42000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 42250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 42500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 42750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 43000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 43250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 43500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 43750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 44000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 44250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 44500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 44750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.34__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.34__remove_dataprovider_dups.sql new file mode 100644 index 000000000..fed63fae5 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.34__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 45000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 45250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 45500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 45750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 46000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 46250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 46500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 46750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 47000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 47250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 47500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 47750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 48000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 48250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 48500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 48750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 49000000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 49250000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 49500000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM dataprovider_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 49750000) DELETE FROM dataprovider WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.35__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.35__remove_dataprovider_dups.sql new file mode 100644 index 000000000..d4ecb635e --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.35__remove_dataprovider_dups.sql @@ -0,0 +1,3 @@ + +-- Just in case anything was missed this query should run much faster after all the rows have been deleted +DELETE FROM dataprovider dp USING dataprovider_ids_to_delete cd WHERE dp.id = cd.id; diff --git a/src/main/resources/db/migration/v0.37.0.36__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.36__remove_dataprovider_dups.sql new file mode 100644 index 000000000..571cf2476 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.36__remove_dataprovider_dups.sql @@ -0,0 +1,22 @@ +-- This is faster then doing the query in one go as it seems this is done in memory whereas a larger query operates on disk which is much slower + +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 0) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 1000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 1250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 1500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 1750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 2000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 2250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 2500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 2750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 3000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 3250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 3500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 3750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 4000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 4250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 4500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 4750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.37__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.37__remove_dataprovider_dups.sql new file mode 100644 index 000000000..a67689b29 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.37__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 5000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 5250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 5500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 5750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 6000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 6250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 6500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 6750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 7000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 7250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 7500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 7750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 8000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 8250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 8500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 8750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 9000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 9250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 9500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 9750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.38__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.38__remove_dataprovider_dups.sql new file mode 100644 index 000000000..54d68ac7e --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.38__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 10000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 10250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 10500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 10750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 11000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 11250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 11500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 11750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 12000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 12250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 12500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 12750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 13000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 13250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 13500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 13750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 14000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 14250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 14500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 14750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.39__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.39__remove_dataprovider_dups.sql new file mode 100644 index 000000000..70a25d95b --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.39__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 15000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 15250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 15500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 15750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 16000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 16250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 16500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 16750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 17000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 17250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 17500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 17750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 18000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 18250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 18500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 18750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 19000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 19250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 19500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 19750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.40__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.40__remove_dataprovider_dups.sql new file mode 100644 index 000000000..68859f711 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.40__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 20000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 20250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 20500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 20750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 21000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 21250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 21500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 21750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 22000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 22250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 22500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 22750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 23000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 23250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 23500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 23750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 24000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 24250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 24500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 24750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.41__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.41__remove_dataprovider_dups.sql new file mode 100644 index 000000000..1500ed3d1 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.41__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 25000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 25250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 25500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 25750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 26000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 26250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 26500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 26750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 27000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 27250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 27500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 27750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 28000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 28250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 28500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 28750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 29000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 29250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 29500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 29750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.42__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.42__remove_dataprovider_dups.sql new file mode 100644 index 000000000..ff118bb2c --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.42__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 30000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 30250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 30500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 30750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 31000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 31250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 31500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 31750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 32000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 32250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 32500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 32750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 33000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 33250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 33500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 33750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 34000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 34250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 34500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 34750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.43__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.43__remove_dataprovider_dups.sql new file mode 100644 index 000000000..c6d2fc4db --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.43__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 35000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 35250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 35500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 35750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 36000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 36250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 36500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 36750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 37000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 37250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 37500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 37750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 38000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 38250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 38500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 38750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 39000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 39250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 39500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 39750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.44__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.44__remove_dataprovider_dups.sql new file mode 100644 index 000000000..29c934d3e --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.44__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 40000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 40250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 40500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 40750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 41000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 41250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 41500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 41750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 42000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 42250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 42500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 42750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 43000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 43250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 43500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 43750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 44000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 44250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 44500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 44750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.45__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.45__remove_dataprovider_dups.sql new file mode 100644 index 000000000..37dae1a75 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.45__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 45000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 45250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 45500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 45750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 46000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 46250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 46500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 46750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 47000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 47250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 47500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 47750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 48000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 48250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 48500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 48750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 49000000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 49250000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 49500000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); +WITH ids AS (SELECT id FROM crossreference_ids_to_delete ORDER BY id LIMIT 250000 OFFSET 49750000) DELETE FROM crossreference WHERE id IN (SELECT id FROM ids); diff --git a/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql new file mode 100644 index 000000000..b82cdb6a4 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql @@ -0,0 +1,5 @@ +DELETE FROM crossreference cr USING crossreference_ids_to_delete cd WHERE cr.id = cd.id; + +DROP TABLE dataprovider_ids_to_keep; +DROP TABLE crossreference_ids_to_delete; +DROP TABLE dataprovider_ids_to_delete; diff --git a/src/main/resources/db/migration/v0.37.0.7__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.7__remove_dataprovider_dups.sql new file mode 100644 index 000000000..6bd15b727 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.7__remove_dataprovider_dups.sql @@ -0,0 +1,11 @@ +-- Update the null resourcedescriptorpage_id to the XB home page because Xenbase != XB + +update crossreference + set resourcedescriptorpage_id = ( + SELECT rp.id + FROM resourcedescriptorpage rp, resourcedescriptor rd + WHERE rp.resourcedescriptor_id=rd.id and rd.prefix = 'Xenbase' and rp.name = 'homepage' +) +WHERE resourcedescriptorpage_id is null and referencedCurie = 'XB'; + +CREATE INDEX organization_abbreviation_index ON organization USING btree (abbreviation); diff --git a/src/main/resources/db/migration/v0.37.0.8__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.8__remove_dataprovider_dups.sql new file mode 100644 index 000000000..aa005d10c --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.8__remove_dataprovider_dups.sql @@ -0,0 +1,20 @@ +-- Update all BiologicalEntity.dataProvider to point to a XB dataprovider that is first in the DB + +UPDATE biologicalentity be +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'XB' + AND cr1.referencedCurie = 'XB' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + be.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'XB'; + diff --git a/src/main/resources/db/migration/v0.37.0.9__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.9__remove_dataprovider_dups.sql new file mode 100644 index 000000000..47783da19 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.9__remove_dataprovider_dups.sql @@ -0,0 +1,21 @@ +-- Update all BiologicalEntity.dataProvider to point to a FB dataprovider that is first in the DB + +UPDATE biologicalentity be +SET dataprovider_id = ( + SELECT dp1.id + FROM dataprovider dp1, organization o1, crossreference cr1 + WHERE o1.id = dp1.sourceorganization_id + AND dp1.crossreference_id = cr1.id + AND o1.abbreviation = 'FB' + AND cr1.referencedCurie = 'FB' + ORDER BY dp1.id ASC LIMIT 1 +) +FROM dataprovider dp, organization o, crossreference cr +WHERE + be.dataprovider_id=dp.id AND + dp.sourceorganization_id=o.id AND + dp.crossreference_id = cr.id AND + o.abbreviation = cr.referencedCurie AND + o.abbreviation = 'FB'; + + From d670d07ee2102e5d071a39f30ba3426469265375 Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Wed, 28 Aug 2024 19:01:53 -0600 Subject: [PATCH 02/19] Fixed migration file --- .../db/migration/v0.37.0.24__remove_dataprovider_dups.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/resources/db/migration/v0.37.0.24__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.24__remove_dataprovider_dups.sql index 221275a12..943628a8f 100644 --- a/src/main/resources/db/migration/v0.37.0.24__remove_dataprovider_dups.sql +++ b/src/main/resources/db/migration/v0.37.0.24__remove_dataprovider_dups.sql @@ -13,7 +13,6 @@ INSERT INTO dataprovider_ids_to_keep (id) SELECT dataprovider_id FROM annotation INSERT INTO dataprovider_ids_to_keep (id) SELECT dataprovider_id FROM biologicalentity where dataprovider_id is not null ON CONFLICT (id) DO NOTHING; -- 6241140 INSERT INTO dataprovider_ids_to_keep (id) SELECT dataprovider_id FROM chromosome where dataprovider_id is not null ON CONFLICT (id) DO NOTHING; -- 0 INSERT INTO dataprovider_ids_to_keep (id) SELECT secondarydataprovider_id FROM diseaseannotation where secondarydataprovider_id is not null ON CONFLICT (id) DO NOTHING; -- 14380 -INSERT INTO dataprovider_ids_to_keep (id) SELECT dataprovider_id FROM htpexpressiondatasetannotation where dataprovider_id is not null ON CONFLICT (id) DO NOTHING; -- 0 INSERT INTO dataprovider_ids_to_keep (id) SELECT dataprovider_id FROM reagent where dataprovider_id is not null ON CONFLICT (id) DO NOTHING; -- 226431 INSERT INTO dataprovider_ids_to_keep (id) SELECT dataprovider_id FROM species where dataprovider_id is not null ON CONFLICT (id) DO NOTHING; -- 10 -- Total 6673392 From b043391685b7b2452e79dd45d53ac7b0f17982fa Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Wed, 28 Aug 2024 20:36:36 -0600 Subject: [PATCH 03/19] Added missing indexes and updated dao's to use cache --- .../curation_api/dao/DataProviderDAO.java | 8 +++++++- .../curation_api/dao/OrganizationDAO.java | 2 ++ .../curation_api/jobs/executors/Gff3Executor.java | 11 +++++++---- .../curation_api/model/entities/CodingSequence.java | 5 ++++- .../curation_api/model/entities/Exon.java | 5 ++++- .../curation_api/model/entities/Transcript.java | 5 +++++ .../model/entities/ontology/OntologyTerm.java | 1 + .../migration/v0.37.0.7__remove_dataprovider_dups.sql | 5 +++++ 8 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/dao/DataProviderDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/DataProviderDAO.java index dcd7a918b..22ed12adf 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/DataProviderDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/DataProviderDAO.java @@ -29,11 +29,16 @@ public DataProvider getOrCreateDataProvider(Organization sourceOrganization) { if (dataProviderCache.containsKey(sourceOrganization.getAbbreviation())) { return dataProviderCache.get(sourceOrganization.getAbbreviation()); } + + HashMap params = new HashMap<>(); + params.put("sourceOrganization.abbreviation", sourceOrganization.getAbbreviation()); + params.put("crossReference.referencedCurie", sourceOrganization.getAbbreviation()); - SearchResponse orgResponse = findByField("sourceOrganization.abbreviation", sourceOrganization.getAbbreviation()); + SearchResponse orgResponse = findByParams(params); if (orgResponse != null) { DataProvider member = orgResponse.getSingleResult(); if (member != null && member.getSourceOrganization() != null) { + dataProviderCache.put(sourceOrganization.getAbbreviation(), member); return member; } } else { @@ -47,6 +52,7 @@ public DataProvider getOrCreateDataProvider(Organization sourceOrganization) { xref.setResourceDescriptorPage(sourceOrganization.getHomepageResourceDescriptorPage()); dataProvider.setCrossReference(crossReferenceDAO.persist(xref)); + dataProviderCache.put(sourceOrganization.getAbbreviation(), dataProvider); return persist(dataProvider); } diff --git a/src/main/java/org/alliancegenome/curation_api/dao/OrganizationDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/OrganizationDAO.java index 37d33336e..c28fafc7f 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/OrganizationDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/OrganizationDAO.java @@ -25,10 +25,12 @@ public Organization getOrCreateOrganization(String abbreviation) { params.put("abbreviation", abbreviation); SearchResponse resp = findByParams(params); if (resp != null) { + organizationCache.put(abbreviation, resp.getSingleResult()); return resp.getSingleResult(); } else { Organization o = new Organization(); o.setAbbreviation(abbreviation); + organizationCache.put(abbreviation, o); return persist(o); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java index da955293f..0b6f1efb1 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java @@ -188,8 +188,9 @@ private String loadGenomeAssembly(String assemblyName, BulkLoadFileHistory histo return assemblyName; } - private Map> loadEntities(BulkLoadFileHistory history, List>> gffData, Map> idsAdded, - BackendBulkDataProvider dataProvider, ProcessDisplayHelper ph) { + private Map> loadEntities(BulkLoadFileHistory history, List>> gffData, Map> idsAdded, BackendBulkDataProvider dataProvider, ProcessDisplayHelper ph) { + int updateThreshhold = 500; // Aim for every 5 seconds (r/s * 5 = this number) + int updateCounter = 0; for (ImmutablePair> gff3EntryPair : gffData) { try { idsAdded = gff3Service.loadEntity(history, gff3EntryPair, idsAdded, dataProvider); @@ -202,10 +203,12 @@ private Map> loadEntities(BulkLoadFileHistory history, List Date: Thu, 29 Aug 2024 11:58:08 -0600 Subject: [PATCH 04/19] Initial code to restructure bulk load file history --- .../cliapp/src/service/DataLoadService.js | 3 + .../crud/CodingSequenceCrudController.java | 7 +- .../controllers/crud/ExonCrudController.java | 6 +- .../crud/TranscriptCrudController.java | 7 +- .../enums/BackendBulkLoadType.java | 18 +- .../curation_api/jobs/JobScheduler.java | 44 +- ...JobEvent.java => PendingLoadJobEvent.java} | 2 +- ...JobEvent.java => StartedLoadJobEvent.java} | 2 +- .../AgmDiseaseAnnotationExecutor.java | 24 +- .../jobs/executors/AgmExecutor.java | 24 +- .../AlleleDiseaseAnnotationExecutor.java | 25 +- .../jobs/executors/AlleleExecutor.java | 24 +- .../jobs/executors/BulkLoadJobExecutor.java | 108 +++-- .../jobs/executors/ConstructExecutor.java | 25 +- .../GeneDiseaseAnnotationExecutor.java | 24 +- .../jobs/executors/GeneExecutor.java | 24 +- .../executors/GeneExpressionExecutor.java | 42 +- .../GeneGeneticInteractionExecutor.java | 18 +- .../GeneMolecularInteractionExecutor.java | 19 +- .../jobs/executors/Gff3CDSExecutor.java | 130 +++++ .../jobs/executors/Gff3Executor.java | 446 +++++++++--------- .../jobs/executors/Gff3ExonExecutor.java | 130 +++++ .../executors/Gff3TranscriptExecutor.java | 125 +++++ ...TPExpressionDatasetAnnotationExecutor.java | 27 +- .../jobs/executors/LoadFileExecutor.java | 91 ++-- .../jobs/executors/MoleculeExecutor.java | 26 +- .../jobs/executors/OntologyExecutor.java | 118 +++-- .../jobs/executors/OrthologyExecutor.java | 30 +- .../jobs/executors/ParalogyExecutor.java | 30 +- .../PhenotypeAnnotationExecutor.java | 38 +- .../executors/ResourceDescriptorExecutor.java | 31 +- .../SequenceTargetingReagentExecutor.java | 40 +- .../jobs/executors/VariantExecutor.java | 24 +- .../AlleleGeneAssociationExecutor.java | 24 +- ...tructGenomicEntityAssociationExecutor.java | 24 +- .../jobs/processors/BulkLoadProcessor.java | 98 ++-- .../jobs/processors/StartLoadProcessor.java | 35 +- .../curation_api/jobs/util/SlackNotifier.java | 26 +- .../model/entities/bulkloads/BulkLoad.java | 14 +- .../entities/bulkloads/BulkLoadFile.java | 35 +- .../bulkloads/BulkLoadFileHistory.java | 19 +- .../curation_api/services/Gff3Service.java | 79 ++-- .../loads/BulkLoadFileHistoryService.java | 2 +- .../services/loads/BulkLoadFileService.java | 19 +- .../validation/dto/Gff3DtoValidator.java | 8 +- 45 files changed, 1276 insertions(+), 839 deletions(-) rename src/main/java/org/alliancegenome/curation_api/jobs/events/{PendingBulkLoadFileJobEvent.java => PendingLoadJobEvent.java} (78%) rename src/main/java/org/alliancegenome/curation_api/jobs/events/{StartedBulkLoadFileJobEvent.java => StartedLoadJobEvent.java} (78%) create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3CDSExecutor.java create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3ExonExecutor.java create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptExecutor.java diff --git a/src/main/cliapp/src/service/DataLoadService.js b/src/main/cliapp/src/service/DataLoadService.js index 28b2eaa66..518f64a5f 100644 --- a/src/main/cliapp/src/service/DataLoadService.js +++ b/src/main/cliapp/src/service/DataLoadService.js @@ -81,6 +81,9 @@ export class DataLoadService extends BaseAuthService { const bulkLoadTypes = { BulkFMSLoad: [ 'GFF', + 'GFF_EXON', + 'GFF_CDS', + 'GFF_TRANSCRIPT', 'HTPDATASET', 'INTERACTION-GEN', 'INTERACTION-MOL', diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/CodingSequenceCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/CodingSequenceCrudController.java index 5585ab971..412afee93 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/crud/CodingSequenceCrudController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/CodingSequenceCrudController.java @@ -5,7 +5,7 @@ import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; import org.alliancegenome.curation_api.dao.CodingSequenceDAO; import org.alliancegenome.curation_api.interfaces.crud.CodingSequenceCrudInterface; -import org.alliancegenome.curation_api.jobs.executors.Gff3Executor; +import org.alliancegenome.curation_api.jobs.executors.Gff3CDSExecutor; import org.alliancegenome.curation_api.model.entities.CodingSequence; import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; import org.alliancegenome.curation_api.response.APIResponse; @@ -22,7 +22,7 @@ public class CodingSequenceCrudController extends BaseEntityCrudController gffData) { - return gff3Executor.runLoadApi(dataProvider, assembly, gffData); + return gff3CDSExecutor.runLoadApi(dataProvider, assembly, gffData); } @Override diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/ExonCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/ExonCrudController.java index bf4c105dd..f34218e37 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/crud/ExonCrudController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/ExonCrudController.java @@ -5,7 +5,7 @@ import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; import org.alliancegenome.curation_api.dao.ExonDAO; import org.alliancegenome.curation_api.interfaces.crud.ExonCrudInterface; -import org.alliancegenome.curation_api.jobs.executors.Gff3Executor; +import org.alliancegenome.curation_api.jobs.executors.Gff3ExonExecutor; import org.alliancegenome.curation_api.model.entities.Exon; import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; import org.alliancegenome.curation_api.response.APIResponse; @@ -22,7 +22,7 @@ public class ExonCrudController extends BaseEntityCrudController gffData) { - return gff3Executor.runLoadApi(dataProvider, assembly, gffData); + return gff3ExonExecutor.runLoadApi(dataProvider, assembly, gffData); } @Override diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java index a2b5e6c2a..0e2f19afd 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java @@ -5,7 +5,7 @@ import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; import org.alliancegenome.curation_api.dao.TranscriptDAO; import org.alliancegenome.curation_api.interfaces.crud.TranscriptCrudInterface; -import org.alliancegenome.curation_api.jobs.executors.Gff3Executor; +import org.alliancegenome.curation_api.jobs.executors.Gff3TranscriptExecutor; import org.alliancegenome.curation_api.model.entities.Transcript; import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; import org.alliancegenome.curation_api.response.APIResponse; @@ -22,7 +22,7 @@ public class TranscriptCrudController extends BaseEntityCrudController gffData) { - return gff3Executor.runLoadApi(dataProvider, assembly, gffData); + return gff3TranscriptExecutor.runLoadApi(dataProvider, assembly, gffData); } @Override diff --git a/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java b/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java index 2c3ae805d..3996ad186 100644 --- a/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java +++ b/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java @@ -18,13 +18,27 @@ public enum BackendBulkLoadType { ALLELE_ASSOCIATION("json"), CONSTRUCT_ASSOCIATION("json"), VARIANT("json"), - GFF("gff"), + + // GFF all from the same file but split out + GFF("gff"), // For Database entries + + GFF_EXON("gff"), + GFF_CDS("gff"), + GFF_TRANSCRIPT("gff"), + GFF_EXON_LOCATION("gff"), + GFF_CDS_LOCATION("gff"), + GFF_TRANSCRIPT_LOCATION("gff"), + GFF_TRANSCRIPT_GENE("gff"), + GFF_TRANSCRIPT_EXON("gff"), + GFF_TRANSCRIPT_CDS("gff"), + INTERACTION_MOL("tsv"), INTERACTION_GEN("tsv"), PARALOGY("json"), SEQUENCE_TARGETING_REAGENT("json"), EXPRESSION("json"), - HTPDATASET("json"); + HTPDATASET("json"), + ; public String fileExtension; diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/JobScheduler.java b/src/main/java/org/alliancegenome/curation_api/jobs/JobScheduler.java index b5ac99ad3..ebfe5685d 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/JobScheduler.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/JobScheduler.java @@ -7,15 +7,17 @@ import org.alliancegenome.curation_api.dao.loads.BulkLoadDAO; import org.alliancegenome.curation_api.dao.loads.BulkLoadFileDAO; import org.alliancegenome.curation_api.dao.loads.BulkLoadFileExceptionDAO; +import org.alliancegenome.curation_api.dao.loads.BulkLoadFileHistoryDAO; import org.alliancegenome.curation_api.dao.loads.BulkLoadGroupDAO; import org.alliancegenome.curation_api.enums.JobStatus; -import org.alliancegenome.curation_api.jobs.events.PendingBulkLoadFileJobEvent; import org.alliancegenome.curation_api.jobs.events.PendingBulkLoadJobEvent; -import org.alliancegenome.curation_api.jobs.events.StartedBulkLoadFileJobEvent; +import org.alliancegenome.curation_api.jobs.events.PendingLoadJobEvent; import org.alliancegenome.curation_api.jobs.events.StartedBulkLoadJobEvent; +import org.alliancegenome.curation_api.jobs.events.StartedLoadJobEvent; import org.alliancegenome.curation_api.jobs.util.SlackNotifier; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoad; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadGroup; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkScheduledLoad; import org.alliancegenome.curation_api.response.SearchResponse; @@ -46,9 +48,11 @@ public class JobScheduler { @Inject Event startedJobEvents; - @Inject Event startedFileJobEvents; + @Inject Event startedFileJobEvents; + @Inject BulkLoadFileDAO bulkLoadFileDAO; + @Inject BulkLoadFileHistoryDAO bulkLoadFileHistoryDAO; @Inject BulkLoadGroupDAO groupDAO; @Inject BulkLoadDAO bulkLoadDAO; @Inject BulkLoadFileExceptionDAO bulkLoadFileExceptionDAO; @@ -70,21 +74,23 @@ public void init() { if (g.getLoads().size() > 0) { for (BulkLoad b : g.getLoads()) { boolean isFirst = true; - for (BulkLoadFile bf : b.getLoadFiles()) { - if (bf.getBulkloadStatus() == null || bf.getBulkloadStatus().isRunning() || bf.getBulkloadStatus().isStarted() || bf.getLocalFilePath() != null) { - if (bf.getLocalFilePath() != null) { - File file = new File(bf.getLocalFilePath()); + for (BulkLoadFileHistory bfh : b.getHistory()) { + BulkLoadFile bulkLoadFile = bfh.getBulkLoadFile(); + if (bfh.getBulkloadStatus() == null || bfh.getBulkloadStatus().isRunning() || bfh.getBulkloadStatus().isStarted() || bulkLoadFile.getLocalFilePath() != null) { + if (bulkLoadFile.getLocalFilePath() != null) { + File file = new File(bulkLoadFile.getLocalFilePath()); if (file.exists()) { file.delete(); } } - bf.setLocalFilePath(null); - bf.setErrorMessage("Failed due to server start up: Process never finished before the server restarted"); - bf.setBulkloadStatus(JobStatus.FAILED); + bulkLoadFile.setLocalFilePath(null); + bfh.setErrorMessage("Failed due to server start up: Process never finished before the server restarted"); + bfh.setBulkloadStatus(JobStatus.FAILED); if (isFirst) { - slackNotifier.slackalert(bf); // Only notify on the first failed file not all the failed files under a load + slackNotifier.slackalert(bfh); // Only notify on the first failed file not all the failed files under a load } - bulkLoadFileDAO.merge(bf); + bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileHistoryDAO.merge(bfh); } isFirst = false; } @@ -168,15 +174,15 @@ public void pendingJobs(@Observes PendingBulkLoadJobEvent event) { } } - public void pendingFileJobs(@Observes PendingBulkLoadFileJobEvent event) { + public void pendingFileJobs(@Observes PendingLoadJobEvent event) { // Log.info("pendingFileJobs: " + event.getId()); - BulkLoadFile fileLoad = bulkLoadFileDAO.find(event.getId()); - if (fileLoad != null) { - if (fileLoad.getBulkloadStatus().isPending()) { - fileLoad.setBulkloadStatus(fileLoad.getBulkloadStatus().getNextStatus()); - bulkLoadFileDAO.merge(fileLoad); + BulkLoadFileHistory fileLoadHistory = bulkLoadFileHistoryDAO.find(event.getId()); + if (fileLoadHistory != null) { + if (fileLoadHistory.getBulkloadStatus().isPending()) { + fileLoadHistory.setBulkloadStatus(fileLoadHistory.getBulkloadStatus().getNextStatus()); + bulkLoadFileHistoryDAO.merge(fileLoadHistory); // Log.info("Firing Start File Job Event: " + fileLoad.getId()); - startedFileJobEvents.fire(new StartedBulkLoadFileJobEvent(fileLoad.getId())); + startedFileJobEvents.fire(new StartedLoadJobEvent(fileLoadHistory.getId())); } } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/events/PendingBulkLoadFileJobEvent.java b/src/main/java/org/alliancegenome/curation_api/jobs/events/PendingLoadJobEvent.java similarity index 78% rename from src/main/java/org/alliancegenome/curation_api/jobs/events/PendingBulkLoadFileJobEvent.java rename to src/main/java/org/alliancegenome/curation_api/jobs/events/PendingLoadJobEvent.java index e4765286f..9e789d4f8 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/events/PendingBulkLoadFileJobEvent.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/events/PendingLoadJobEvent.java @@ -4,6 +4,6 @@ import lombok.Data; @Data @AllArgsConstructor -public class PendingBulkLoadFileJobEvent { +public class PendingLoadJobEvent { private Long id; } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/events/StartedBulkLoadFileJobEvent.java b/src/main/java/org/alliancegenome/curation_api/jobs/events/StartedLoadJobEvent.java similarity index 78% rename from src/main/java/org/alliancegenome/curation_api/jobs/events/StartedBulkLoadFileJobEvent.java rename to src/main/java/org/alliancegenome/curation_api/jobs/events/StartedLoadJobEvent.java index 97ecbde78..ecd5f5abf 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/events/StartedBulkLoadFileJobEvent.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/events/StartedLoadJobEvent.java @@ -4,6 +4,6 @@ import lombok.Data; @Data @AllArgsConstructor -public class StartedBulkLoadFileJobEvent { +public class StartedLoadJobEvent { private Long id; } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/AgmDiseaseAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/AgmDiseaseAnnotationExecutor.java index 26a9dfaff..d0ee8f677 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/AgmDiseaseAnnotationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/AgmDiseaseAnnotationExecutor.java @@ -6,7 +6,6 @@ import org.alliancegenome.curation_api.dao.AGMDiseaseAnnotationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkManualLoad; import org.alliancegenome.curation_api.model.ingest.dto.AGMDiseaseAnnotationDTO; @@ -26,13 +25,13 @@ public class AgmDiseaseAnnotationExecutor extends LoadFileExecutor { @Inject DiseaseAnnotationService diseaseAnnotationService; @Inject AGMDiseaseAnnotationService agmDiseaseAnnotationService; - public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) { - BulkManualLoad manual = (BulkManualLoad) bulkLoadFile.getBulkLoad(); + BulkManualLoad manual = (BulkManualLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = manual.getDataProvider(); log.info("Running with dataProvider: " + dataProvider.name()); - IngestDTO ingestDto = readIngestFile(bulkLoadFile, AGMDiseaseAnnotationDTO.class); + IngestDTO ingestDto = readIngestFile(bulkLoadFileHistory, AGMDiseaseAnnotationDTO.class); if (ingestDto == null) { return; } @@ -49,17 +48,18 @@ public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { annotationIdsBefore.removeIf(Objects::isNull); } - bulkLoadFile.setRecordCount(annotations.size() + bulkLoadFile.getRecordCount()); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(annotations.size() + bulkLoadFileHistory.getBulkLoadFile().getRecordCount()); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(annotations.size()); - createHistory(history, bulkLoadFile); - boolean success = runLoad(agmDiseaseAnnotationService, history, dataProvider, annotations, annotationIdsLoaded); + bulkLoadFileHistory.setTotalRecords((long) annotations.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(agmDiseaseAnnotationService, bulkLoadFileHistory, dataProvider, annotations, annotationIdsLoaded); if (success && cleanUp) { - runCleanup(diseaseAnnotationService, history, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "AGM disease annotation", bulkLoadFile.getMd5Sum()); + runCleanup(diseaseAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "AGM disease annotation"); } - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/AgmExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/AgmExecutor.java index a46ef21e6..8ec7922a9 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/AgmExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/AgmExecutor.java @@ -5,7 +5,6 @@ import org.alliancegenome.curation_api.dao.AffectedGenomicModelDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkManualLoad; import org.alliancegenome.curation_api.model.ingest.dto.AffectedGenomicModelDTO; @@ -26,12 +25,12 @@ public class AgmExecutor extends LoadFileExecutor { @Inject NcbiTaxonTermService ncbiTaxonTermService; - public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) { - BulkManualLoad manual = (BulkManualLoad) bulkLoadFile.getBulkLoad(); + BulkManualLoad manual = (BulkManualLoad) bulkLoadFileHistory.getBulkLoad(); Log.info("Running with: " + manual.getDataProvider().name()); - IngestDTO ingestDto = readIngestFile(bulkLoadFile, AffectedGenomicModelDTO.class); + IngestDTO ingestDto = readIngestFile(bulkLoadFileHistory, AffectedGenomicModelDTO.class); if (ingestDto == null) { return; } @@ -50,17 +49,18 @@ public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { Log.debug("runLoad: Before: total " + agmIdsBefore.size()); } - bulkLoadFile.setRecordCount(agms.size() + bulkLoadFile.getRecordCount()); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(agms.size() + bulkLoadFileHistory.getBulkLoadFile().getRecordCount()); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(agms.size()); - createHistory(history, bulkLoadFile); - boolean success = runLoad(affectedGenomicModelService, history, dataProvider, agms, agmIdsLoaded); + bulkLoadFileHistory.setTotalRecords((long) agms.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(affectedGenomicModelService, bulkLoadFileHistory, dataProvider, agms, agmIdsLoaded); if (success && cleanUp) { - runCleanup(affectedGenomicModelService, history, dataProvider.name(), agmIdsBefore, agmIdsLoaded, "AGM", bulkLoadFile.getMd5Sum()); + runCleanup(affectedGenomicModelService, bulkLoadFileHistory, dataProvider.name(), agmIdsBefore, agmIdsLoaded, "AGM"); } - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/AlleleDiseaseAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/AlleleDiseaseAnnotationExecutor.java index 22b3134d4..a4faa1dc2 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/AlleleDiseaseAnnotationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/AlleleDiseaseAnnotationExecutor.java @@ -6,7 +6,6 @@ import org.alliancegenome.curation_api.dao.AlleleDiseaseAnnotationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkManualLoad; import org.alliancegenome.curation_api.model.ingest.dto.AlleleDiseaseAnnotationDTO; @@ -26,13 +25,13 @@ public class AlleleDiseaseAnnotationExecutor extends LoadFileExecutor { @Inject AlleleDiseaseAnnotationService alleleDiseaseAnnotationService; @Inject DiseaseAnnotationService diseaseAnnotationService; - public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) { - BulkManualLoad manual = (BulkManualLoad) bulkLoadFile.getBulkLoad(); + BulkManualLoad manual = (BulkManualLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = manual.getDataProvider(); log.info("Running with dataProvider: " + dataProvider.name()); - IngestDTO ingestDto = readIngestFile(bulkLoadFile, AlleleDiseaseAnnotationDTO.class); + IngestDTO ingestDto = readIngestFile(bulkLoadFileHistory, AlleleDiseaseAnnotationDTO.class); if (ingestDto == null) { return; } @@ -49,17 +48,19 @@ public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { annotationIdsBefore.removeIf(Objects::isNull); } - bulkLoadFile.setRecordCount(annotations.size() + bulkLoadFile.getRecordCount()); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(annotations.size() + bulkLoadFileHistory.getBulkLoadFile().getRecordCount()); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(annotations.size()); - createHistory(history, bulkLoadFile); - boolean success = runLoad(alleleDiseaseAnnotationService, history, dataProvider, annotations, annotationIdsLoaded); + bulkLoadFileHistory.setTotalRecords((long) annotations.size()); + + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(alleleDiseaseAnnotationService, bulkLoadFileHistory, dataProvider, annotations, annotationIdsLoaded); if (success && cleanUp) { - runCleanup(diseaseAnnotationService, history, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "allele disease annotation", bulkLoadFile.getMd5Sum()); + runCleanup(diseaseAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "allele disease annotation"); } - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/AlleleExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/AlleleExecutor.java index 7b1293226..263e6cc48 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/AlleleExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/AlleleExecutor.java @@ -5,7 +5,6 @@ import org.alliancegenome.curation_api.dao.AlleleDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkManualLoad; import org.alliancegenome.curation_api.model.ingest.dto.AlleleDTO; @@ -22,12 +21,12 @@ public class AlleleExecutor extends LoadFileExecutor { @Inject AlleleDAO alleleDAO; @Inject AlleleService alleleService; - public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) { - BulkManualLoad manual = (BulkManualLoad) bulkLoadFile.getBulkLoad(); + BulkManualLoad manual = (BulkManualLoad) bulkLoadFileHistory.getBulkLoad(); Log.info("Running with: " + manual.getDataProvider().name()); - IngestDTO ingestDto = readIngestFile(bulkLoadFile, AlleleDTO.class); + IngestDTO ingestDto = readIngestFile(bulkLoadFileHistory, AlleleDTO.class); if (ingestDto == null) { return; } @@ -46,17 +45,18 @@ public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { Log.debug("runLoad: Before: total " + alleleIdsBefore.size()); } - bulkLoadFile.setRecordCount(alleles.size() + bulkLoadFile.getRecordCount()); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(alleles.size() + bulkLoadFileHistory.getBulkLoadFile().getRecordCount()); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(alleles.size()); - createHistory(history, bulkLoadFile); - boolean success = runLoad(alleleService, history, dataProvider, alleles, alleleIdsLoaded); + bulkLoadFileHistory.setTotalRecords((long) alleles.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(alleleService, bulkLoadFileHistory, dataProvider, alleles, alleleIdsLoaded); if (success && cleanUp) { - runCleanup(alleleService, history, dataProvider.name(), alleleIdsBefore, alleleIdsLoaded, "allele", bulkLoadFile.getMd5Sum()); + runCleanup(alleleService, bulkLoadFileHistory, dataProvider.name(), alleleIdsBefore, alleleIdsLoaded, "allele"); } - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java index c2b03efa7..2da5e806a 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java @@ -19,7 +19,7 @@ import org.alliancegenome.curation_api.enums.BackendBulkLoadType; import org.alliancegenome.curation_api.jobs.executors.associations.alleleAssociations.AlleleGeneAssociationExecutor; import org.alliancegenome.curation_api.jobs.executors.associations.constructAssociations.ConstructGenomicEntityAssociationExecutor; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import jakarta.enterprise.context.ApplicationScoped; import jakarta.inject.Inject; @@ -51,79 +51,101 @@ public class BulkLoadJobExecutor { @Inject ParalogyExecutor paralogyExecutor; @Inject GeneExpressionExecutor geneExpressionExecutor; @Inject SequenceTargetingReagentExecutor sqtrExecutor; - @Inject Gff3Executor gff3Executor; + + @Inject Gff3ExonExecutor gff3ExonExecutor; + @Inject Gff3CDSExecutor gff3CDSExecutor; + @Inject Gff3TranscriptExecutor gff3TranscriptExecutor; + @Inject HTPExpressionDatasetAnnotationExecutor htpExpressionDatasetAnnotationExecutor; - public void process(BulkLoadFile bulkLoadFile, Boolean cleanUp) throws Exception { + public void process(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) throws Exception { - BackendBulkLoadType loadType = bulkLoadFile.getBulkLoad().getBackendBulkLoadType(); + BackendBulkLoadType loadType = bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType(); List ingestTypes = List.of(AGM_DISEASE_ANNOTATION, ALLELE_DISEASE_ANNOTATION, GENE_DISEASE_ANNOTATION, DISEASE_ANNOTATION, AGM, ALLELE, GENE, VARIANT, CONSTRUCT, FULL_INGEST, ALLELE_ASSOCIATION, CONSTRUCT_ASSOCIATION); if (ingestTypes.contains(loadType)) { - bulkLoadFile.setRecordCount(0); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(0); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); if (loadType == AGM || loadType == FULL_INGEST) { - agmExecutor.execLoad(bulkLoadFile, cleanUp); + agmExecutor.execLoad(bulkLoadFileHistory, cleanUp); } if (loadType == ALLELE || loadType == FULL_INGEST) { - alleleExecutor.execLoad(bulkLoadFile, cleanUp); + alleleExecutor.execLoad(bulkLoadFileHistory, cleanUp); } if (loadType == GENE || loadType == FULL_INGEST) { - geneExecutor.execLoad(bulkLoadFile, cleanUp); + geneExecutor.execLoad(bulkLoadFileHistory, cleanUp); } if (loadType == CONSTRUCT || loadType == FULL_INGEST) { - constructExecutor.execLoad(bulkLoadFile, cleanUp); + constructExecutor.execLoad(bulkLoadFileHistory, cleanUp); } if (loadType == VARIANT || loadType == FULL_INGEST) { - variantExecutor.execLoad(bulkLoadFile, cleanUp); + variantExecutor.execLoad(bulkLoadFileHistory, cleanUp); } if (loadType == ALLELE_DISEASE_ANNOTATION || loadType == DISEASE_ANNOTATION || loadType == FULL_INGEST) { - alleleDiseaseAnnotationExecutor.execLoad(bulkLoadFile, cleanUp); + alleleDiseaseAnnotationExecutor.execLoad(bulkLoadFileHistory, cleanUp); } if (loadType == AGM_DISEASE_ANNOTATION || loadType == DISEASE_ANNOTATION || loadType == FULL_INGEST) { - agmDiseaseAnnotationExecutor.execLoad(bulkLoadFile, cleanUp); + agmDiseaseAnnotationExecutor.execLoad(bulkLoadFileHistory, cleanUp); } if (loadType == GENE_DISEASE_ANNOTATION || loadType == DISEASE_ANNOTATION || loadType == FULL_INGEST) { - geneDiseaseAnnotationExecutor.execLoad(bulkLoadFile, cleanUp); + geneDiseaseAnnotationExecutor.execLoad(bulkLoadFileHistory, cleanUp); } if (loadType == ALLELE_ASSOCIATION || loadType == FULL_INGEST) { - alleleGeneAssociationExecutor.execLoad(bulkLoadFile, cleanUp); + alleleGeneAssociationExecutor.execLoad(bulkLoadFileHistory, cleanUp); } if (loadType == CONSTRUCT_ASSOCIATION || loadType == FULL_INGEST) { - constructGenomicEntityAssociationExecutor.execLoad(bulkLoadFile, cleanUp); + constructGenomicEntityAssociationExecutor.execLoad(bulkLoadFileHistory, cleanUp); } - } else if (bulkLoadFile.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.MOLECULE) { - moleculeExecutor.execLoad(bulkLoadFile); - } else if (bulkLoadFile.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.SEQUENCE_TARGETING_REAGENT) { - sqtrExecutor.execLoad(bulkLoadFile); - } else if (bulkLoadFile.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.INTERACTION_MOL) { - geneMolecularInteractionExecutor.execLoad(bulkLoadFile); - } else if (bulkLoadFile.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.INTERACTION_GEN) { - geneGeneticInteractionExecutor.execLoad(bulkLoadFile); - } else if (bulkLoadFile.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.PHENOTYPE) { - phenotypeAnnotationExecutor.execLoad(bulkLoadFile); - } else if (bulkLoadFile.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.ORTHOLOGY) { - orthologyExecutor.execLoad(bulkLoadFile); - } else if (bulkLoadFile.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.PARALOGY) { - paralogyExecutor.execLoad(bulkLoadFile); - } else if (bulkLoadFile.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.ONTOLOGY) { - ontologyExecutor.execLoad(bulkLoadFile); - } else if (bulkLoadFile.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.RESOURCE_DESCRIPTOR) { - resourceDescriptorExecutor.execLoad(bulkLoadFile); - } else if (bulkLoadFile.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.EXPRESSION) { - geneExpressionExecutor.execLoad(bulkLoadFile); - } else if (bulkLoadFile.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.GFF) { - gff3Executor.execLoad(bulkLoadFile); - } else if (bulkLoadFile.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.HTPDATASET) { - htpExpressionDatasetAnnotationExecutor.execLoad(bulkLoadFile); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.MOLECULE) { + moleculeExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.SEQUENCE_TARGETING_REAGENT) { + sqtrExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.INTERACTION_MOL) { + geneMolecularInteractionExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.INTERACTION_GEN) { + geneGeneticInteractionExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.PHENOTYPE) { + phenotypeAnnotationExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.ORTHOLOGY) { + orthologyExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.PARALOGY) { + paralogyExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.ONTOLOGY) { + ontologyExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.RESOURCE_DESCRIPTOR) { + resourceDescriptorExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.EXPRESSION) { + geneExpressionExecutor.execLoad(bulkLoadFileHistory); + + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.GFF_EXON) { + gff3ExonExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.GFF_CDS) { + gff3CDSExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.GFF_TRANSCRIPT) { + gff3TranscriptExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.GFF_EXON_LOCATION) { + //gff3Executor.execLoad(bulkLoadFile); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.GFF_CDS_LOCATION) { + //gff3Executor.execLoad(bulkLoadFile); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.GFF_TRANSCRIPT_LOCATION) { + //gff3Executor.execLoad(bulkLoadFile); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.GFF_TRANSCRIPT_GENE) { + //gff3Executor.execLoad(bulkLoadFile); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.GFF_TRANSCRIPT_EXON) { + //gff3Executor.execLoad(bulkLoadFile); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.GFF_TRANSCRIPT_CDS) { + //gff3Executor.execLoad(bulkLoadFile); + + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.HTPDATASET) { + htpExpressionDatasetAnnotationExecutor.execLoad(bulkLoadFileHistory); } else { - log.info("Load: " + bulkLoadFile.getBulkLoad().getName() + " not implemented"); - throw new Exception("Load: " + bulkLoadFile.getBulkLoad().getName() + " not implemented"); + log.info("Load: " + bulkLoadFileHistory.getBulkLoad().getName() + " for type " + bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() + " not implemented"); + throw new Exception("Load: " + bulkLoadFileHistory.getBulkLoad().getName() + " for type " + bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() + " not implemented"); } - log.info("Process Finished for: " + bulkLoadFile.getBulkLoad().getName()); + log.info("Process Finished for: " + bulkLoadFileHistory.getBulkLoad().getName()); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/ConstructExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/ConstructExecutor.java index 22096d26d..676efb083 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/ConstructExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/ConstructExecutor.java @@ -5,7 +5,6 @@ import org.alliancegenome.curation_api.dao.ConstructDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkManualLoad; import org.alliancegenome.curation_api.model.ingest.dto.ConstructDTO; @@ -26,12 +25,12 @@ public class ConstructExecutor extends LoadFileExecutor { @Inject NcbiTaxonTermService ncbiTaxonTermService; - public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) { - BulkManualLoad manual = (BulkManualLoad) bulkLoadFile.getBulkLoad(); + BulkManualLoad manual = (BulkManualLoad) bulkLoadFileHistory.getBulkLoad(); Log.info("Running with: " + manual.getDataProvider().name()); - IngestDTO ingestDto = readIngestFile(bulkLoadFile, ConstructDTO.class); + IngestDTO ingestDto = readIngestFile(bulkLoadFileHistory, ConstructDTO.class); if (ingestDto == null) { return; } @@ -50,17 +49,19 @@ public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { Log.debug("runLoad: Before: total " + constructIdsBefore.size()); } - bulkLoadFile.setRecordCount(constructs.size() + bulkLoadFile.getRecordCount()); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(constructs.size() + bulkLoadFileHistory.getBulkLoadFile().getRecordCount()); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(constructs.size()); - createHistory(history, bulkLoadFile); - boolean success = runLoad(constructService, history, dataProvider, constructs, constructIdsLoaded); + bulkLoadFileHistory.setTotalDeleteRecords((long) constructs.size()); + + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(constructService, bulkLoadFileHistory, dataProvider, constructs, constructIdsLoaded); if (success && cleanUp) { - runCleanup(constructService, history, dataProvider.name(), constructIdsBefore, constructIdsLoaded, "construct", bulkLoadFile.getMd5Sum()); + runCleanup(constructService, bulkLoadFileHistory, dataProvider.name(), constructIdsBefore, constructIdsLoaded, "construct"); } - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneDiseaseAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneDiseaseAnnotationExecutor.java index aeb8367d0..5e3579921 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneDiseaseAnnotationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneDiseaseAnnotationExecutor.java @@ -6,7 +6,6 @@ import org.alliancegenome.curation_api.dao.GeneDiseaseAnnotationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkManualLoad; import org.alliancegenome.curation_api.model.ingest.dto.GeneDiseaseAnnotationDTO; @@ -26,13 +25,13 @@ public class GeneDiseaseAnnotationExecutor extends LoadFileExecutor { @Inject GeneDiseaseAnnotationService geneDiseaseAnnotationService; @Inject DiseaseAnnotationService diseaseAnnotationService; - public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) { - BulkManualLoad manual = (BulkManualLoad) bulkLoadFile.getBulkLoad(); + BulkManualLoad manual = (BulkManualLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = manual.getDataProvider(); log.info("Running with dataProvider: " + dataProvider.name()); - IngestDTO ingestDto = readIngestFile(bulkLoadFile, GeneDiseaseAnnotationDTO.class); + IngestDTO ingestDto = readIngestFile(bulkLoadFileHistory, GeneDiseaseAnnotationDTO.class); if (ingestDto == null) { return; } @@ -49,17 +48,18 @@ public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { annotationIdsBefore.removeIf(Objects::isNull); } - bulkLoadFile.setRecordCount(annotations.size() + bulkLoadFile.getRecordCount()); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(annotations.size() + bulkLoadFileHistory.getBulkLoadFile().getRecordCount()); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(annotations.size()); - createHistory(history, bulkLoadFile); - boolean success = runLoad(geneDiseaseAnnotationService, history, dataProvider, annotations, annotationIdsLoaded); + bulkLoadFileHistory.setTotalRecords((long) annotations.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(geneDiseaseAnnotationService, bulkLoadFileHistory, dataProvider, annotations, annotationIdsLoaded); if (success && cleanUp) { - runCleanup(diseaseAnnotationService, history, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "gene disease annotation", bulkLoadFile.getMd5Sum()); + runCleanup(diseaseAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "gene disease annotation"); } - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExecutor.java index a7b0cec73..fb140f31e 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExecutor.java @@ -5,7 +5,6 @@ import org.alliancegenome.curation_api.dao.GeneDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkManualLoad; import org.alliancegenome.curation_api.model.ingest.dto.GeneDTO; @@ -27,13 +26,13 @@ public class GeneExecutor extends LoadFileExecutor { @Inject NcbiTaxonTermService ncbiTaxonTermService; - public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) { - BulkManualLoad manual = (BulkManualLoad) bulkLoadFile.getBulkLoad(); + BulkManualLoad manual = (BulkManualLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = manual.getDataProvider(); log.info("Running with dataProvider : " + dataProvider.name()); - IngestDTO ingestDto = readIngestFile(bulkLoadFile, GeneDTO.class); + IngestDTO ingestDto = readIngestFile(bulkLoadFileHistory, GeneDTO.class); if (ingestDto == null) { return; } @@ -50,17 +49,18 @@ public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { log.debug("runLoad: Before: total " + geneIdsBefore.size()); } - bulkLoadFile.setRecordCount(genes.size() + bulkLoadFile.getRecordCount()); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(genes.size() + bulkLoadFileHistory.getBulkLoadFile().getRecordCount()); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(genes.size()); - createHistory(history, bulkLoadFile); - boolean success = runLoad(geneService, history, dataProvider, genes, geneIdsLoaded); + bulkLoadFileHistory.setTotalRecords((long) genes.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(geneService, bulkLoadFileHistory, dataProvider, genes, geneIdsLoaded); if (success && cleanUp) { - runCleanup(geneService, history, dataProvider.name(), geneIdsBefore, geneIdsLoaded, "gene", bulkLoadFile.getMd5Sum()); + runCleanup(geneService, bulkLoadFileHistory, dataProvider.name(), geneIdsBefore, geneIdsLoaded, "gene"); } - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java index 8f9d81f88..f477ed15a 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java @@ -1,55 +1,57 @@ package org.alliancegenome.curation_api.jobs.executors; -import jakarta.enterprise.context.ApplicationScoped; -import jakarta.inject.Inject; +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.GZIPInputStream; + import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.entities.GeneExpressionAnnotation; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.GeneExpressionIngestFmsDTO; import org.alliancegenome.curation_api.services.GeneExpressionAnnotationService; import org.apache.commons.lang3.StringUtils; -import java.io.FileInputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.zip.GZIPInputStream; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; @ApplicationScoped public class GeneExpressionExecutor extends LoadFileExecutor { @Inject GeneExpressionAnnotationService geneExpressionAnnotationService; - public void execLoad(BulkLoadFile bulkLoadFile) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { - BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFile.getBulkLoad(); + BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fms.getFmsDataSubType()); - GeneExpressionIngestFmsDTO geneExpressionIngestFmsDTO = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFile.getLocalFilePath())), GeneExpressionIngestFmsDTO.class); - bulkLoadFile.setRecordCount(geneExpressionIngestFmsDTO.getData().size()); + GeneExpressionIngestFmsDTO geneExpressionIngestFmsDTO = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())), GeneExpressionIngestFmsDTO.class); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(geneExpressionIngestFmsDTO.getData().size()); AGRCurationSchemaVersion version = GeneExpressionAnnotation.class.getAnnotation(AGRCurationSchemaVersion.class); - bulkLoadFile.setLinkMLSchemaVersion(version.max()); + bulkLoadFileHistory.getBulkLoadFile().setLinkMLSchemaVersion(version.max()); if (geneExpressionIngestFmsDTO.getMetaData() != null && StringUtils.isNotBlank(geneExpressionIngestFmsDTO.getMetaData().getRelease())) { - bulkLoadFile.setAllianceMemberReleaseVersion(geneExpressionIngestFmsDTO.getMetaData().getRelease()); + bulkLoadFileHistory.getBulkLoadFile().setAllianceMemberReleaseVersion(geneExpressionIngestFmsDTO.getMetaData().getRelease()); } + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(geneExpressionIngestFmsDTO.getData().size()); - createHistory(history, bulkLoadFile); + bulkLoadFileHistory.setTotalRecords((long) geneExpressionIngestFmsDTO.getData().size()); + updateHistory(bulkLoadFileHistory); + List annotationIdsLoaded = new ArrayList<>(); List annotationIdsBefore = geneExpressionAnnotationService.getAnnotationIdsByDataProvider(dataProvider); - boolean success = runLoad(geneExpressionAnnotationService, history, dataProvider, geneExpressionIngestFmsDTO.getData(), annotationIdsLoaded); + boolean success = runLoad(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider, geneExpressionIngestFmsDTO.getData(), annotationIdsLoaded); if (success) { - runCleanup(geneExpressionAnnotationService, history, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "gene expression annotation", bulkLoadFile.getMd5Sum()); + runCleanup(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "gene expression annotation"); } - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } catch (Exception e) { - failLoad(bulkLoadFile, e); + failLoad(bulkLoadFileHistory, e); e.printStackTrace(); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneGeneticInteractionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneGeneticInteractionExecutor.java index cba477bd8..936cb5bf8 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneGeneticInteractionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneGeneticInteractionExecutor.java @@ -7,7 +7,6 @@ import org.alliancegenome.curation_api.dao.GeneGeneticInteractionDAO; import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.PsiMiTabDTO; import org.alliancegenome.curation_api.services.GeneGeneticInteractionService; @@ -28,28 +27,29 @@ public class GeneGeneticInteractionExecutor extends LoadFileExecutor { @Inject GeneGeneticInteractionService geneGeneticInteractionService; @Inject GeneInteractionService geneInteractionService; - public void execLoad(BulkLoadFile bulkLoadFile) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { CsvSchema psiMiTabSchema = CsvSchemaBuilder.psiMiTabSchema(); CsvMapper csvMapper = new CsvMapper(); - MappingIterator it = csvMapper.enable(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS).readerFor(PsiMiTabDTO.class).with(psiMiTabSchema).readValues(new GZIPInputStream(new FileInputStream(bulkLoadFile.getLocalFilePath()))); + MappingIterator it = csvMapper.enable(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS).readerFor(PsiMiTabDTO.class).with(psiMiTabSchema).readValues(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()))); List interactionData = it.readAll(); List interactionIdsLoaded = new ArrayList<>(); List interactionIdsBefore = geneGeneticInteractionDAO.findAllIds().getResults(); + bulkLoadFileHistory.setTotalRecords((long) interactionData.size()); + updateHistory(bulkLoadFileHistory); - BulkLoadFileHistory history = new BulkLoadFileHistory(interactionData.size()); - createHistory(history, bulkLoadFile); - boolean success = runLoad(geneGeneticInteractionService, history, null, interactionData, interactionIdsLoaded, false); + boolean success = runLoad(geneGeneticInteractionService, bulkLoadFileHistory, null, interactionData, interactionIdsLoaded, false); if (success) { - runCleanup(geneInteractionService, history, "COMBINED", interactionIdsBefore, interactionIdsLoaded, "gene genetic interaction", bulkLoadFile.getMd5Sum()); + runCleanup(geneInteractionService, bulkLoadFileHistory, "COMBINED", interactionIdsBefore, interactionIdsLoaded, "gene genetic interaction"); } - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } catch (Exception e) { + failLoad(bulkLoadFileHistory, e); e.printStackTrace(); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneMolecularInteractionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneMolecularInteractionExecutor.java index acfbe9953..bbbe4bdb6 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneMolecularInteractionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneMolecularInteractionExecutor.java @@ -7,7 +7,6 @@ import org.alliancegenome.curation_api.dao.GeneMolecularInteractionDAO; import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.PsiMiTabDTO; import org.alliancegenome.curation_api.services.GeneInteractionService; @@ -28,27 +27,29 @@ public class GeneMolecularInteractionExecutor extends LoadFileExecutor { @Inject GeneMolecularInteractionService geneMolecularInteractionService; @Inject GeneInteractionService geneInteractionService; - public void execLoad(BulkLoadFile bulkLoadFile) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { CsvSchema psiMiTabSchema = CsvSchemaBuilder.psiMiTabSchema(); CsvMapper csvMapper = new CsvMapper(); - MappingIterator it = csvMapper.enable(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS).readerFor(PsiMiTabDTO.class).with(psiMiTabSchema).readValues(new GZIPInputStream(new FileInputStream(bulkLoadFile.getLocalFilePath()))); + MappingIterator it = csvMapper.enable(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS).readerFor(PsiMiTabDTO.class).with(psiMiTabSchema).readValues(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()))); List interactionData = it.readAll(); List interactionIdsLoaded = new ArrayList<>(); List interactionIdsBefore = geneMolecularInteractionDAO.findAllIds().getResults(); - BulkLoadFileHistory history = new BulkLoadFileHistory(interactionData.size()); - createHistory(history, bulkLoadFile); - boolean success = runLoad(geneMolecularInteractionService, history, null, interactionData, interactionIdsLoaded, false); + bulkLoadFileHistory.setTotalRecords((long) interactionData.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(geneMolecularInteractionService, bulkLoadFileHistory, null, interactionData, interactionIdsLoaded, false); if (success) { - runCleanup(geneInteractionService, history, "COMBINED", interactionIdsBefore, interactionIdsLoaded, "gene molecular interaction", bulkLoadFile.getMd5Sum()); + runCleanup(geneInteractionService, bulkLoadFileHistory, "COMBINED", interactionIdsBefore, interactionIdsLoaded, "gene molecular interaction"); } - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } catch (Exception e) { + failLoad(bulkLoadFileHistory, e); e.printStackTrace(); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3CDSExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3CDSExecutor.java new file mode 100644 index 000000000..d670630ed --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3CDSExecutor.java @@ -0,0 +1,130 @@ +package org.alliancegenome.curation_api.jobs.executors; + +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.zip.GZIPInputStream; + +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; +import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; +import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.response.LoadHistoryResponce; +import org.alliancegenome.curation_api.services.CodingSequenceService; +import org.alliancegenome.curation_api.services.Gff3Service; +import org.alliancegenome.curation_api.util.ProcessDisplayHelper; +import org.apache.commons.lang3.tuple.ImmutablePair; + +import com.fasterxml.jackson.databind.MappingIterator; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvParser; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; + +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; + +@ApplicationScoped +public class Gff3CDSExecutor extends Gff3Executor { + + @Inject Gff3Service gff3Service; + @Inject CodingSequenceService cdsService; + + @Override + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { + try { + + CsvSchema gff3Schema = CsvSchemaBuilder.gff3Schema(); + CsvMapper csvMapper = new CsvMapper(); + MappingIterator it = csvMapper.enable(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS).readerFor(Gff3DTO.class).with(gff3Schema).readValues(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()))); + List gffData = it.readAll(); + List gffHeaderData = new ArrayList<>(); + for (Gff3DTO gffLine : gffData) { + if (gffLine.getSeqId().startsWith("#")) { + gffHeaderData.add(gffLine.getSeqId()); + } else { + break; + } + } + gffData.subList(0, gffHeaderData.size()).clear(); + + BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); + + List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + + gffData.clear(); + + List idsAdded = new ArrayList<>(); + + bulkLoadFileHistory.setTotalRecords((long) preProcessedGffData.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedGffData, idsAdded, dataProvider); + + if (success) { + runCleanup(cdsService, bulkLoadFileHistory, dataProvider.name(), cdsService.getIdsByDataProvider(dataProvider), idsAdded, "GFF coding sequence"); + } + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); + + } catch (Exception e) { + e.printStackTrace(); + } + } + + private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider) { + + ProcessDisplayHelper ph = new ProcessDisplayHelper(); + ph.addDisplayHandler(loadProcessDisplayService); + ph.startProcess("GFF update for " + dataProvider.name(), (gffData.size() * 3) + 1); + + loadCDSEntities(history, gffData, idsAdded, dataProvider, ph); + + ph.finishProcess(); + + return true; + } + + public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { + List idsAdded = new ArrayList<>(); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); + List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + BulkLoadFileHistory history = new BulkLoadFileHistory((preProcessedGffData.size() * 3) + 1); + + runLoad(history, null, preProcessedGffData, idsAdded, dataProvider); + history.finishLoad(); + + return new LoadHistoryResponce(history); + } + + + private void loadCDSEntities(BulkLoadFileHistory history, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider, ProcessDisplayHelper ph) { + int updateThreshhold = 1000; // Aim for every 5 seconds (r/s * 5 = this number) + int updateCounter = 0; + for (ImmutablePair> gff3EntryPair : gffData) { + try { + gff3Service.loadCDSEntity(history, gff3EntryPair, idsAdded, dataProvider); + history.incrementCompleted(); + } catch (ObjectUpdateException e) { + history.incrementFailed(); + addException(history, e.getData()); + } catch (Exception e) { + e.printStackTrace(); + history.incrementFailed(); + addException(history, new ObjectUpdateExceptionData(gff3EntryPair.getKey(), e.getMessage(), e.getStackTrace())); + } + if (updateCounter++ % updateThreshhold == 0) { + updateHistory(history); + } + ph.progressProcess(); + } + updateHistory(history); + } + + +} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java index 0b6f1efb1..cd537d0d9 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java @@ -1,262 +1,244 @@ package org.alliancegenome.curation_api.jobs.executors; -import java.io.FileInputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.zip.GZIPInputStream; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; -import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; -import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; -import org.alliancegenome.curation_api.response.APIResponse; -import org.alliancegenome.curation_api.response.LoadHistoryResponce; -import org.alliancegenome.curation_api.services.CodingSequenceService; -import org.alliancegenome.curation_api.services.ExonService; -import org.alliancegenome.curation_api.services.Gff3Service; -import org.alliancegenome.curation_api.services.TranscriptService; -import org.alliancegenome.curation_api.services.associations.codingSequenceAssociations.CodingSequenceGenomicLocationAssociationService; -import org.alliancegenome.curation_api.services.associations.exonAssociations.ExonGenomicLocationAssociationService; -import org.alliancegenome.curation_api.services.associations.transcriptAssociations.TranscriptCodingSequenceAssociationService; -import org.alliancegenome.curation_api.services.associations.transcriptAssociations.TranscriptExonAssociationService; -import org.alliancegenome.curation_api.services.associations.transcriptAssociations.TranscriptGeneAssociationService; -import org.alliancegenome.curation_api.services.associations.transcriptAssociations.TranscriptGenomicLocationAssociationService; import org.alliancegenome.curation_api.services.helpers.gff3.Gff3AttributesHelper; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.lang3.tuple.ImmutablePair; -import com.fasterxml.jackson.databind.MappingIterator; -import com.fasterxml.jackson.dataformat.csv.CsvMapper; -import com.fasterxml.jackson.dataformat.csv.CsvParser; -import com.fasterxml.jackson.dataformat.csv.CsvSchema; - import jakarta.enterprise.context.ApplicationScoped; -import jakarta.inject.Inject; @ApplicationScoped public class Gff3Executor extends LoadFileExecutor { - @Inject Gff3Service gff3Service; - @Inject ExonService exonService; - @Inject CodingSequenceService cdsService; - @Inject TranscriptService transcriptService; - @Inject ExonGenomicLocationAssociationService exonLocationService; - @Inject CodingSequenceGenomicLocationAssociationService cdsLocationService; - @Inject TranscriptGenomicLocationAssociationService transcriptLocationService; - @Inject TranscriptGeneAssociationService transcriptGeneService; - @Inject TranscriptExonAssociationService transcriptExonService; - @Inject TranscriptCodingSequenceAssociationService transcriptCdsService; +// @Inject Gff3Service gff3Service; +// @Inject ExonService exonService; +// @Inject CodingSequenceService cdsService; +// @Inject TranscriptService transcriptService; +// @Inject ExonGenomicLocationAssociationService exonLocationService; +// @Inject CodingSequenceGenomicLocationAssociationService cdsLocationService; +// @Inject TranscriptGenomicLocationAssociationService transcriptLocationService; +// @Inject TranscriptGeneAssociationService transcriptGeneService; +// @Inject TranscriptExonAssociationService transcriptExonService; +// @Inject TranscriptCodingSequenceAssociationService transcriptCdsService; - public void execLoad(BulkLoadFile bulkLoadFile) { - try { - - CsvSchema gff3Schema = CsvSchemaBuilder.gff3Schema(); - CsvMapper csvMapper = new CsvMapper(); - MappingIterator it = csvMapper.enable(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS).readerFor(Gff3DTO.class).with(gff3Schema).readValues(new GZIPInputStream(new FileInputStream(bulkLoadFile.getLocalFilePath()))); - List gffData = it.readAll(); - List gffHeaderData = new ArrayList<>(); - for (Gff3DTO gffLine : gffData) { - if (gffLine.getSeqId().startsWith("#")) { - gffHeaderData.add(gffLine.getSeqId()); - } else { - break; - } - } - gffData.subList(0, gffHeaderData.size()).clear(); - - BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFile.getBulkLoad(); - BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); - - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); - - gffData.clear(); - - Map> idsAdded = createIdsAddedMap(); - Map> previousIds = getPreviouslyLoadedIds(dataProvider); - - BulkLoadFileHistory history = new BulkLoadFileHistory((preProcessedGffData.size() * 3) + 1); - createHistory(history, bulkLoadFile); - idsAdded = runLoad(history, gffHeaderData, preProcessedGffData, idsAdded, dataProvider); - runCleanup(transcriptService, history, dataProvider.name(), previousIds.get("Transcript"), idsAdded.get("Transcript"), "GFF transcript", bulkLoadFile.getMd5Sum()); - runCleanup(exonService, history, dataProvider.name(), previousIds.get("Exon"), idsAdded.get("Exon"), "GFF exon", bulkLoadFile.getMd5Sum()); - runCleanup(cdsService, history, dataProvider.name(), previousIds.get("CodingSequence"), idsAdded.get("CodingSequence"), "GFF coding sequence", bulkLoadFile.getMd5Sum()); - runCleanup(transcriptLocationService, history, dataProvider.name(), previousIds.get("TranscriptGenomicLocationAssociation"), idsAdded.get("TranscriptGenomicLocationAssociation"), "GFF transcript genomic location association", bulkLoadFile.getMd5Sum()); - runCleanup(exonLocationService, history, dataProvider.name(), previousIds.get("ExonGenomicLocationAssociation"), idsAdded.get("ExonGenomicLocationAssociation"), "GFF exon genomic location association", bulkLoadFile.getMd5Sum()); - runCleanup(cdsLocationService, history, dataProvider.name(), previousIds.get("CodingSequenceGenomicLocationAssociation"), idsAdded.get("CodingSequenceGenomicLocationAssociation"), "GFF coding sequence genomic location association", bulkLoadFile.getMd5Sum()); - runCleanup(transcriptGeneService, history, dataProvider.name(), previousIds.get("TranscriptGeneAssociation"), idsAdded.get("TranscriptGeneAssociation"), "GFF transcript gene association", bulkLoadFile.getMd5Sum()); - runCleanup(transcriptExonService, history, dataProvider.name(), previousIds.get("TranscriptExonAssociation"), idsAdded.get("TranscriptExonAssociation"), "GFF transcript exon association", bulkLoadFile.getMd5Sum()); - runCleanup(transcriptCdsService, history, dataProvider.name(), previousIds.get("TranscriptCodingSequenceAssociation"), idsAdded.get("TranscriptCodingSequenceAssociation"), "GFF transcript coding sequence association", bulkLoadFile.getMd5Sum()); - - history.finishLoad(); - finalSaveHistory(history); - - } catch (Exception e) { - e.printStackTrace(); - } + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { +// try { +// +// CsvSchema gff3Schema = CsvSchemaBuilder.gff3Schema(); +// CsvMapper csvMapper = new CsvMapper(); +// MappingIterator it = csvMapper.enable(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS).readerFor(Gff3DTO.class).with(gff3Schema).readValues(new GZIPInputStream(new FileInputStream(bulkLoadFile.getLocalFilePath()))); +// List gffData = it.readAll(); +// List gffHeaderData = new ArrayList<>(); +// for (Gff3DTO gffLine : gffData) { +// if (gffLine.getSeqId().startsWith("#")) { +// gffHeaderData.add(gffLine.getSeqId()); +// } else { +// break; +// } +// } +// gffData.subList(0, gffHeaderData.size()).clear(); +// +// BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFile.getBulkLoad(); +// BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); +// +// List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); +// +// gffData.clear(); +// +// Map> idsAdded = createIdsAddedMap(); +// +// BulkLoadFileHistory history = new BulkLoadFileHistory((preProcessedGffData.size() * 3) + 1); +// createHistory(history, bulkLoadFile); +// +// boolean success = runLoad(history, gffHeaderData, preProcessedGffData, idsAdded, dataProvider); +// +// if (success) { +// Map> previousIds = getPreviouslyLoadedIds(dataProvider); +// runCleanup(transcriptService, history, dataProvider.name(), previousIds.get("Transcript"), idsAdded.get("Transcript"), "GFF transcript", bulkLoadFile.getMd5Sum()); +// runCleanup(exonService, history, dataProvider.name(), previousIds.get("Exon"), idsAdded.get("Exon"), "GFF exon", bulkLoadFile.getMd5Sum()); +// runCleanup(cdsService, history, dataProvider.name(), previousIds.get("CodingSequence"), idsAdded.get("CodingSequence"), "GFF coding sequence", bulkLoadFile.getMd5Sum()); +// runCleanup(transcriptLocationService, history, dataProvider.name(), previousIds.get("TranscriptGenomicLocationAssociation"), idsAdded.get("TranscriptGenomicLocationAssociation"), "GFF transcript genomic location association", bulkLoadFile.getMd5Sum()); +// runCleanup(exonLocationService, history, dataProvider.name(), previousIds.get("ExonGenomicLocationAssociation"), idsAdded.get("ExonGenomicLocationAssociation"), "GFF exon genomic location association", bulkLoadFile.getMd5Sum()); +// runCleanup(cdsLocationService, history, dataProvider.name(), previousIds.get("CodingSequenceGenomicLocationAssociation"), idsAdded.get("CodingSequenceGenomicLocationAssociation"), "GFF coding sequence genomic location association", bulkLoadFile.getMd5Sum()); +// runCleanup(transcriptGeneService, history, dataProvider.name(), previousIds.get("TranscriptGeneAssociation"), idsAdded.get("TranscriptGeneAssociation"), "GFF transcript gene association", bulkLoadFile.getMd5Sum()); +// runCleanup(transcriptExonService, history, dataProvider.name(), previousIds.get("TranscriptExonAssociation"), idsAdded.get("TranscriptExonAssociation"), "GFF transcript exon association", bulkLoadFile.getMd5Sum()); +// runCleanup(transcriptCdsService, history, dataProvider.name(), previousIds.get("TranscriptCodingSequenceAssociation"), idsAdded.get("TranscriptCodingSequenceAssociation"), "GFF transcript coding sequence association", bulkLoadFile.getMd5Sum()); +// } +// history.finishLoad(); +// finalSaveHistory(history); +// +// } catch (Exception e) { +// e.printStackTrace(); +// } } - private Map> createIdsAddedMap() { - Map> idsAdded = new HashMap>(); - idsAdded.put("Transcript", new ArrayList()); - idsAdded.put("Exon", new ArrayList()); - idsAdded.put("CodingSequence", new ArrayList()); - idsAdded.put("TranscriptGenomicLocationAssociation", new ArrayList()); - idsAdded.put("ExonGenomicLocationAssociation", new ArrayList()); - idsAdded.put("CodingSequenceGenomicLocationAssociation", new ArrayList()); - idsAdded.put("TranscriptGeneAssociation", new ArrayList()); - idsAdded.put("TranscriptExonAssociation", new ArrayList()); - idsAdded.put("TranscriptCodingSequenceAssociation", new ArrayList()); - - return idsAdded; - } +// private Map> createIdsAddedMap() { +// Map> idsAdded = new HashMap>(); +// idsAdded.put("Transcript", new ArrayList()); +// idsAdded.put("Exon", new ArrayList()); +// idsAdded.put("CodingSequence", new ArrayList()); +// idsAdded.put("TranscriptGenomicLocationAssociation", new ArrayList()); +// idsAdded.put("ExonGenomicLocationAssociation", new ArrayList()); +// idsAdded.put("CodingSequenceGenomicLocationAssociation", new ArrayList()); +// idsAdded.put("TranscriptGeneAssociation", new ArrayList()); +// idsAdded.put("TranscriptExonAssociation", new ArrayList()); +// idsAdded.put("TranscriptCodingSequenceAssociation", new ArrayList()); +// +// return idsAdded; +// } - private Map> getPreviouslyLoadedIds(BackendBulkDataProvider dataProvider) { - Map> previousIds = new HashMap<>(); - - previousIds.put("Transcript", transcriptService.getIdsByDataProvider(dataProvider)); - previousIds.put("Exon", exonService.getIdsByDataProvider(dataProvider)); - previousIds.put("CodingSequence", cdsService.getIdsByDataProvider(dataProvider)); - previousIds.put("TranscriptGenomicLocationAssociation", transcriptLocationService.getIdsByDataProvider(dataProvider)); - previousIds.put("ExonGenomicLocationAssociation", exonLocationService.getIdsByDataProvider(dataProvider)); - previousIds.put("CodingSequenceGenomicLocationAssociation", cdsLocationService.getIdsByDataProvider(dataProvider)); - previousIds.put("TranscriptGeneAssociation", transcriptGeneService.getIdsByDataProvider(dataProvider)); - previousIds.put("TranscriptExonAssociation", transcriptExonService.getIdsByDataProvider(dataProvider)); - previousIds.put("TranscriptCodingSequenceAssociation", transcriptCdsService.getIdsByDataProvider(dataProvider)); - - - return previousIds; - } +// private Map> getPreviouslyLoadedIds(BackendBulkDataProvider dataProvider) { +// Map> previousIds = new HashMap<>(); +// +// previousIds.put("Transcript", transcriptService.getIdsByDataProvider(dataProvider)); +// previousIds.put("Exon", exonService.getIdsByDataProvider(dataProvider)); +// previousIds.put("CodingSequence", cdsService.getIdsByDataProvider(dataProvider)); +// previousIds.put("TranscriptGenomicLocationAssociation", transcriptLocationService.getIdsByDataProvider(dataProvider)); +// previousIds.put("ExonGenomicLocationAssociation", exonLocationService.getIdsByDataProvider(dataProvider)); +// previousIds.put("CodingSequenceGenomicLocationAssociation", cdsLocationService.getIdsByDataProvider(dataProvider)); +// previousIds.put("TranscriptGeneAssociation", transcriptGeneService.getIdsByDataProvider(dataProvider)); +// previousIds.put("TranscriptExonAssociation", transcriptExonService.getIdsByDataProvider(dataProvider)); +// previousIds.put("TranscriptCodingSequenceAssociation", transcriptCdsService.getIdsByDataProvider(dataProvider)); +// +// +// return previousIds; +// } - private Map> runLoad(BulkLoadFileHistory history, List gffHeaderData, List>> gffData, - Map> idsAdded, BackendBulkDataProvider dataProvider) { - return runLoad(history, gffHeaderData, gffData, idsAdded, dataProvider, null); - } - - private Map> runLoad(BulkLoadFileHistory history, List gffHeaderData, List>> gffData, - Map> idsAdded, BackendBulkDataProvider dataProvider, String assemblyId) { - - ProcessDisplayHelper ph = new ProcessDisplayHelper(); - ph.addDisplayHandler(loadProcessDisplayService); - ph.startProcess("GFF update for " + dataProvider.name(), (gffData.size() * 3) + 1); - - assemblyId = loadGenomeAssembly(assemblyId, history, gffHeaderData, dataProvider, ph); - idsAdded = loadEntities(history, gffData, idsAdded, dataProvider, ph); - - Map geneIdCurieMap = gff3Service.getIdCurieMap(gffData, dataProvider); - - idsAdded = loadLocationAssociations(history, gffData, idsAdded, dataProvider, assemblyId, geneIdCurieMap, ph); - idsAdded = loadParentChildAssociations(history, gffData, idsAdded, dataProvider, assemblyId, geneIdCurieMap, ph); - - ph.finishProcess(); - - return idsAdded; - } +// private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, List>> gffData, Map> idsAdded, BackendBulkDataProvider dataProvider) { +// return runLoad(history, gffHeaderData, gffData, idsAdded, dataProvider, null); +// } +// +// private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, List>> gffData, Map> idsAdded, BackendBulkDataProvider dataProvider, String assemblyId) { +// +// ProcessDisplayHelper ph = new ProcessDisplayHelper(); +// ph.addDisplayHandler(loadProcessDisplayService); +// ph.startProcess("GFF update for " + dataProvider.name(), (gffData.size() * 3) + 1); +// +// assemblyId = loadGenomeAssembly(assemblyId, history, gffHeaderData, dataProvider, ph); +// +// if (assemblyId == null) { +// failLoad(history.getBulkLoadFile(), new Exception("GFF Header does not contain assembly")); +// return false; +// } else { +// loadEntities(history, gffData, idsAdded, dataProvider, ph); +// Map geneIdCurieMap = gff3Service.getIdCurieMap(gffData); +// loadLocationAssociations(history, gffData, idsAdded, dataProvider, assemblyId, geneIdCurieMap, ph); +// loadParentChildAssociations(history, gffData, idsAdded, dataProvider, assemblyId, geneIdCurieMap, ph); +// } +// ph.finishProcess(); +// +// return true; +// } - public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { - Map> idsAdded = createIdsAddedMap(); - BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); - BulkLoadFileHistory history = new BulkLoadFileHistory((preProcessedGffData.size() * 3) + 1); - - runLoad(history, null, preProcessedGffData, idsAdded, dataProvider, assemblyName); - history.finishLoad(); - - return new LoadHistoryResponce(history); - } - - private String loadGenomeAssembly(String assemblyName, BulkLoadFileHistory history, List gffHeaderData, BackendBulkDataProvider dataProvider, ProcessDisplayHelper ph) { - try { - assemblyName = gff3Service.loadGenomeAssembly(assemblyName, gffHeaderData, dataProvider); - history.incrementCompleted(); - } catch (ObjectUpdateException e) { - history.incrementFailed(); - addException(history, e.getData()); - } catch (Exception e) { - e.printStackTrace(); - history.incrementFailed(); - addException(history, new ObjectUpdateExceptionData(gffHeaderData, e.getMessage(), e.getStackTrace())); - } - updateHistory(history); - ph.progressProcess(); - - return assemblyName; - } +// public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { +// Map> idsAdded = createIdsAddedMap(); +// BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); +// List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); +// BulkLoadFileHistory history = new BulkLoadFileHistory((preProcessedGffData.size() * 3) + 1); +// +// runLoad(history, null, preProcessedGffData, idsAdded, dataProvider, assemblyName); +// history.finishLoad(); +// +// return new LoadHistoryResponce(history); +// } +// +// private String loadGenomeAssembly(String assemblyName, BulkLoadFileHistory history, List gffHeaderData, BackendBulkDataProvider dataProvider, ProcessDisplayHelper ph) { +// try { +// assemblyName = gff3Service.loadGenomeAssembly(assemblyName, gffHeaderData, dataProvider); +// history.incrementCompleted(); +// } catch (ObjectUpdateException e) { +// //e.printStackTrace(); +// history.incrementFailed(); +// addException(history, e.getData()); +// } catch (Exception e) { +// e.printStackTrace(); +// history.incrementFailed(); +// addException(history, new ObjectUpdateExceptionData(gffHeaderData, e.getMessage(), e.getStackTrace())); +// } +// updateHistory(history); +// ph.progressProcess(); +// +// return assemblyName; +// } - private Map> loadEntities(BulkLoadFileHistory history, List>> gffData, Map> idsAdded, BackendBulkDataProvider dataProvider, ProcessDisplayHelper ph) { - int updateThreshhold = 500; // Aim for every 5 seconds (r/s * 5 = this number) - int updateCounter = 0; - for (ImmutablePair> gff3EntryPair : gffData) { - try { - idsAdded = gff3Service.loadEntity(history, gff3EntryPair, idsAdded, dataProvider); - history.incrementCompleted(); - } catch (ObjectUpdateException e) { - history.incrementFailed(); - addException(history, e.getData()); - } catch (Exception e) { - e.printStackTrace(); - history.incrementFailed(); - addException(history, new ObjectUpdateExceptionData(gff3EntryPair.getKey(), e.getMessage(), e.getStackTrace())); - } - if (updateCounter++ % updateThreshhold == 0) { - updateHistory(history); - } - ph.progressProcess(); - } - updateHistory(history); - return idsAdded; - } - - private Map> loadLocationAssociations(BulkLoadFileHistory history, List>> gffData, Map> idsAdded, - BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap, ProcessDisplayHelper ph) { - - for (ImmutablePair> gff3EntryPair : gffData) { - try { - idsAdded = gff3Service.loadLocationAssociations(history, gff3EntryPair, idsAdded, dataProvider, assemblyId, geneIdCurieMap); - history.incrementCompleted(); - } catch (ObjectUpdateException e) { - history.incrementFailed(); - addException(history, e.getData()); - } catch (Exception e) { - e.printStackTrace(); - history.incrementFailed(); - addException(history, new ObjectUpdateExceptionData(gff3EntryPair.getKey(), e.getMessage(), e.getStackTrace())); - } - updateHistory(history); - ph.progressProcess(); - } - - return idsAdded; - } - - private Map> loadParentChildAssociations(BulkLoadFileHistory history, List>> gffData, Map> idsAdded, - BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap, ProcessDisplayHelper ph) { - - for (ImmutablePair> gff3EntryPair : gffData) { - try { - idsAdded = gff3Service.loadParentChildAssociations(history, gff3EntryPair, idsAdded, dataProvider, assemblyId, geneIdCurieMap); - history.incrementCompleted(); - } catch (ObjectUpdateException e) { - history.incrementFailed(); - addException(history, e.getData()); - } catch (Exception e) { - e.printStackTrace(); - history.incrementFailed(); - addException(history, new ObjectUpdateExceptionData(gff3EntryPair.getKey(), e.getMessage(), e.getStackTrace())); - } - updateHistory(history); - ph.progressProcess(); - } - - return idsAdded; - } +// private void loadEntities(BulkLoadFileHistory history, List>> gffData, Map> idsAdded, BackendBulkDataProvider dataProvider, ProcessDisplayHelper ph) { +// int updateThreshhold = 1000; // Aim for every 5 seconds (r/s * 5 = this number) +// int updateCounter = 0; +// for (ImmutablePair> gff3EntryPair : gffData) { +// try { +// gff3Service.loadEntity(history, gff3EntryPair, idsAdded, dataProvider); +// history.incrementCompleted(); +// } catch (ObjectUpdateException e) { +// history.incrementFailed(); +// addException(history, e.getData()); +// } catch (Exception e) { +// e.printStackTrace(); +// history.incrementFailed(); +// addException(history, new ObjectUpdateExceptionData(gff3EntryPair.getKey(), e.getMessage(), e.getStackTrace())); +// } +// if (updateCounter++ % updateThreshhold == 0) { +// updateHistory(history); +// } +// ph.progressProcess(); +// } +// updateHistory(history); +// +// } +// +// private void loadLocationAssociations(BulkLoadFileHistory history, List>> gffData, Map> idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap, ProcessDisplayHelper ph) { +// int updateThreshhold = 1000; // Aim for every 5 seconds (r/s * 5 = this number) +// int updateCounter = 0; +// for (ImmutablePair> gff3EntryPair : gffData) { +// try { +// gff3Service.loadLocationAssociations(history, gff3EntryPair, idsAdded, dataProvider, assemblyId, geneIdCurieMap); +// history.incrementCompleted(); +// } catch (ObjectUpdateException e) { +// e.printStackTrace(); +// history.incrementFailed(); +// addException(history, e.getData()); +// } catch (Exception e) { +// e.printStackTrace(); +// history.incrementFailed(); +// addException(history, new ObjectUpdateExceptionData(gff3EntryPair.getKey(), e.getMessage(), e.getStackTrace())); +// } +// if (updateCounter++ % updateThreshhold == 0) { +// updateHistory(history); +// } +// ph.progressProcess(); +// } +// updateHistory(history); +// +// } +// +// private void loadParentChildAssociations(BulkLoadFileHistory history, List>> gffData, Map> idsAdded, +// BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap, ProcessDisplayHelper ph) { +// +// for (ImmutablePair> gff3EntryPair : gffData) { +// try { +// gff3Service.loadParentChildAssociations(history, gff3EntryPair, idsAdded, dataProvider, assemblyId, geneIdCurieMap); +// history.incrementCompleted(); +// } catch (ObjectUpdateException e) { +// history.incrementFailed(); +// addException(history, e.getData()); +// } catch (Exception e) { +// e.printStackTrace(); +// history.incrementFailed(); +// addException(history, new ObjectUpdateExceptionData(gff3EntryPair.getKey(), e.getMessage(), e.getStackTrace())); +// } +// updateHistory(history); +// ph.progressProcess(); +// } +// +// } - private List>> preProcessGffData(List gffData, BackendBulkDataProvider dataProvider) { + protected List>> preProcessGffData(List gffData, BackendBulkDataProvider dataProvider) { List>> processedGffData = new ArrayList<>(); ProcessDisplayHelper ph = new ProcessDisplayHelper(); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3ExonExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3ExonExecutor.java new file mode 100644 index 000000000..f635fda48 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3ExonExecutor.java @@ -0,0 +1,130 @@ +package org.alliancegenome.curation_api.jobs.executors; + +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.zip.GZIPInputStream; + +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; +import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; +import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.response.LoadHistoryResponce; +import org.alliancegenome.curation_api.services.ExonService; +import org.alliancegenome.curation_api.services.Gff3Service; +import org.alliancegenome.curation_api.util.ProcessDisplayHelper; +import org.apache.commons.lang3.tuple.ImmutablePair; + +import com.fasterxml.jackson.databind.MappingIterator; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvParser; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; + +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; + +@ApplicationScoped +public class Gff3ExonExecutor extends Gff3Executor { + + @Inject Gff3Service gff3Service; + @Inject ExonService exonService; + + @Override + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { + try { + + CsvSchema gff3Schema = CsvSchemaBuilder.gff3Schema(); + CsvMapper csvMapper = new CsvMapper(); + MappingIterator it = csvMapper.enable(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS).readerFor(Gff3DTO.class).with(gff3Schema).readValues(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()))); + List gffData = it.readAll(); + List gffHeaderData = new ArrayList<>(); + for (Gff3DTO gffLine : gffData) { + if (gffLine.getSeqId().startsWith("#")) { + gffHeaderData.add(gffLine.getSeqId()); + } else { + break; + } + } + gffData.subList(0, gffHeaderData.size()).clear(); + + BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); + + List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + + gffData.clear(); + + List idsAdded = new ArrayList(); + + bulkLoadFileHistory.setTotalRecords((long) preProcessedGffData.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedGffData, idsAdded, dataProvider); + if (success) { + runCleanup(exonService, bulkLoadFileHistory, dataProvider.name(), exonService.getIdsByDataProvider(dataProvider), idsAdded, "GFF exon"); + } + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); + + } catch (Exception e) { + e.printStackTrace(); + } + } + + + private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider) { + + ProcessDisplayHelper ph = new ProcessDisplayHelper(); + ph.addDisplayHandler(loadProcessDisplayService); + ph.startProcess("GFF Exon update for " + dataProvider.name(), (gffData.size() * 3) + 1); + + loadExonEntities(history, gffData, idsAdded, dataProvider, ph); + + ph.finishProcess(); + + return true; + } + + public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { + List idsAdded = new ArrayList(); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); + List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + BulkLoadFileHistory history = new BulkLoadFileHistory((preProcessedGffData.size() * 3) + 1); + + runLoad(history, null, preProcessedGffData, idsAdded, dataProvider); + history.finishLoad(); + + return new LoadHistoryResponce(history); + } + + + private void loadExonEntities(BulkLoadFileHistory history, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider, ProcessDisplayHelper ph) { + int updateThreshhold = 1000; // Aim for every 5 seconds (r/s * 5 = this number) + int updateCounter = 0; + for (ImmutablePair> gff3EntryPair : gffData) { + try { + gff3Service.loadExonEntity(history, gff3EntryPair, idsAdded, dataProvider); + history.incrementCompleted(); + } catch (ObjectUpdateException e) { + history.incrementFailed(); + addException(history, e.getData()); + } catch (Exception e) { + e.printStackTrace(); + history.incrementFailed(); + addException(history, new ObjectUpdateExceptionData(gff3EntryPair.getKey(), e.getMessage(), e.getStackTrace())); + } + //if (++updateCounter % updateThreshhold == 0) { + // updateHistory(history); + //} + ph.progressProcess(); + } + updateHistory(history); + + } + +} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptExecutor.java new file mode 100644 index 000000000..635c6bbb6 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptExecutor.java @@ -0,0 +1,125 @@ +package org.alliancegenome.curation_api.jobs.executors; + +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.zip.GZIPInputStream; + +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; +import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; +import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.response.LoadHistoryResponce; +import org.alliancegenome.curation_api.services.Gff3Service; +import org.alliancegenome.curation_api.services.TranscriptService; +import org.alliancegenome.curation_api.util.ProcessDisplayHelper; +import org.apache.commons.lang3.tuple.ImmutablePair; + +import com.fasterxml.jackson.databind.MappingIterator; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvParser; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; + +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; + +@ApplicationScoped +public class Gff3TranscriptExecutor extends Gff3Executor { + + @Inject Gff3Service gff3Service; + @Inject TranscriptService transcriptService; + + @Override + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { + try { + + CsvSchema gff3Schema = CsvSchemaBuilder.gff3Schema(); + CsvMapper csvMapper = new CsvMapper(); + MappingIterator it = csvMapper.enable(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS).readerFor(Gff3DTO.class).with(gff3Schema).readValues(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()))); + List gffData = it.readAll(); + List gffHeaderData = new ArrayList<>(); + for (Gff3DTO gffLine : gffData) { + if (gffLine.getSeqId().startsWith("#")) { + gffHeaderData.add(gffLine.getSeqId()); + } else { + break; + } + } + gffData.subList(0, gffHeaderData.size()).clear(); + + BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); + + List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + + gffData.clear(); + + List idsAdded = new ArrayList<>(); + + bulkLoadFileHistory.setTotalRecords((long) preProcessedGffData.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedGffData, idsAdded, dataProvider); + + if (success) { + runCleanup(transcriptService, bulkLoadFileHistory, dataProvider.name(), transcriptService.getIdsByDataProvider(dataProvider), idsAdded, "GFF transcript"); + } + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); + + } catch (Exception e) { + e.printStackTrace(); + } + } + + private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider) { + + ProcessDisplayHelper ph = new ProcessDisplayHelper(); + ph.addDisplayHandler(loadProcessDisplayService); + ph.startProcess("GFF update for " + dataProvider.name(), (gffData.size() * 3) + 1); + + loadTranscriptEntities(history, gffData, idsAdded, dataProvider, ph); + + ph.finishProcess(); + + return true; + } + + public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { + List idsAdded = new ArrayList<>(); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); + List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + BulkLoadFileHistory history = new BulkLoadFileHistory((preProcessedGffData.size() * 3) + 1); + + runLoad(history, null, preProcessedGffData, idsAdded, dataProvider); + history.finishLoad(); + + return new LoadHistoryResponce(history); + } + + private void loadTranscriptEntities(BulkLoadFileHistory history, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider, ProcessDisplayHelper ph) { + updateHistory(history); + for (ImmutablePair> gff3EntryPair : gffData) { + try { + gff3Service.loadTranscriptEntity(history, gff3EntryPair, idsAdded, dataProvider); + history.incrementCompleted(); + } catch (ObjectUpdateException e) { + history.incrementFailed(); + addException(history, e.getData()); + } catch (Exception e) { + e.printStackTrace(); + history.incrementFailed(); + addException(history, new ObjectUpdateExceptionData(gff3EntryPair.getKey(), e.getMessage(), e.getStackTrace())); + } + ph.progressProcess(); + } + updateHistory(history); + + } + +} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetAnnotationExecutor.java index 26f85c705..dd2b5fde1 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetAnnotationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetAnnotationExecutor.java @@ -14,7 +14,6 @@ import org.alliancegenome.curation_api.model.entities.ExternalDataBaseEntity; import org.alliancegenome.curation_api.model.entities.HTPExpressionDatasetAnnotation; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetAnnotationFmsDTO; import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetAnnotationIngestFmsDTO; @@ -34,28 +33,31 @@ public class HTPExpressionDatasetAnnotationExecutor extends LoadFileExecutor { @Inject HTPExpressionDatasetAnnotationService htpExpressionDatasetAnnotationService; @Inject HTPExpressionDatasetAnnotationDAO htpExpressionDatasetAnnotationDAO; - public void execLoad(BulkLoadFile bulkLoadFile) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { - BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFile.getBulkLoad(); + BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); - HTPExpressionDatasetAnnotationIngestFmsDTO htpExpressionDatasetData = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFile.getLocalFilePath())), HTPExpressionDatasetAnnotationIngestFmsDTO.class); - bulkLoadFile.setRecordCount(htpExpressionDatasetData.getData().size()); + HTPExpressionDatasetAnnotationIngestFmsDTO htpExpressionDatasetData = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())), HTPExpressionDatasetAnnotationIngestFmsDTO.class); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(htpExpressionDatasetData.getData().size()); AGRCurationSchemaVersion version = HTPExpressionDatasetAnnotation.class.getAnnotation(AGRCurationSchemaVersion.class); - bulkLoadFile.setLinkMLSchemaVersion(version.max()); + bulkLoadFileHistory.getBulkLoadFile().setLinkMLSchemaVersion(version.max()); if (htpExpressionDatasetData.getMetaData() != null && StringUtils.isNotBlank(htpExpressionDatasetData.getMetaData().getRelease())) { - bulkLoadFile.setAllianceMemberReleaseVersion(htpExpressionDatasetData.getMetaData().getRelease()); + bulkLoadFileHistory.getBulkLoadFile().setAllianceMemberReleaseVersion(htpExpressionDatasetData.getMetaData().getRelease()); } BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fms.getFmsDataSubType()); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); + List datasetIdsLoaded = new ArrayList<>(); - BulkLoadFileHistory history = new BulkLoadFileHistory(htpExpressionDatasetData.getData().size()); - createHistory(history, bulkLoadFile); - boolean result = runLoaddatasetid(externalDataBaseEntityService, history, dataProvider, htpExpressionDatasetData.getData(), datasetIdsLoaded, false); + + bulkLoadFileHistory.setTotalRecords((long) htpExpressionDatasetData.getData().size()); + updateHistory(bulkLoadFileHistory); + + boolean result = runLoaddatasetid(externalDataBaseEntityService, bulkLoadFileHistory, dataProvider, htpExpressionDatasetData.getData(), datasetIdsLoaded, false); } catch (Exception e) { - failLoad(bulkLoadFile, e); + failLoad(bulkLoadFileHistory, e); e.printStackTrace(); } } @@ -79,6 +81,7 @@ private boolean runLoaddatasetid(ExternalDataBaseEntityService externalDataBaseE addException(history, new ObjectUpdateExceptionData(dto, e.getMessage(), e.getStackTrace())); } } + updateHistory(history); ph.finishProcess(); return true; } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java index 827693c81..5765f4391 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java @@ -20,7 +20,6 @@ import org.alliancegenome.curation_api.interfaces.crud.BaseUpsertServiceInterface; import org.alliancegenome.curation_api.jobs.util.SlackNotifier; import org.alliancegenome.curation_api.model.entities.base.AuditedObject; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileException; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.IngestDTO; @@ -50,17 +49,6 @@ public class LoadFileExecutor { @Inject APIVersionInfoService apiVersionInfoService; @Inject SlackNotifier slackNotifier; - protected void createHistory(BulkLoadFileHistory history, BulkLoadFile bulkLoadFile) { - if (bulkLoadFile != null) { - history.setBulkLoadFile(bulkLoadFile); - } - bulkLoadFileHistoryDAO.persist(history); - if (bulkLoadFile != null) { - bulkLoadFile.getHistory().add(history); - bulkLoadFileDAO.merge(bulkLoadFile); - } - } - protected void updateHistory(BulkLoadFileHistory history) { bulkLoadFileHistoryDAO.merge(history); } @@ -108,39 +96,41 @@ private List getVersionParts(String version) { return intParts; } - protected boolean checkSchemaVersion(BulkLoadFile bulkLoadFile, Class dtoClass) { - if (bulkLoadFile.getLinkMLSchemaVersion() == null) { - bulkLoadFile.setErrorMessage("Missing Schema Version"); - bulkLoadFile.setBulkloadStatus(JobStatus.FAILED); - slackNotifier.slackalert(bulkLoadFile); - bulkLoadFileDAO.merge(bulkLoadFile); + protected boolean checkSchemaVersion(BulkLoadFileHistory bulkLoadFileHistory, Class dtoClass) { + if (bulkLoadFileHistory.getBulkLoadFile().getLinkMLSchemaVersion() == null) { + bulkLoadFileHistory.setErrorMessage("Missing Schema Version"); + bulkLoadFileHistory.setBulkloadStatus(JobStatus.FAILED); + slackNotifier.slackalert(bulkLoadFileHistory); + bulkLoadFileHistoryDAO.merge(bulkLoadFileHistory); return false; } - if (!validSchemaVersion(bulkLoadFile.getLinkMLSchemaVersion(), dtoClass)) { - bulkLoadFile.setErrorMessage("Invalid Schema Version: " + bulkLoadFile.getLinkMLSchemaVersion()); - bulkLoadFile.setBulkloadStatus(JobStatus.FAILED); - slackNotifier.slackalert(bulkLoadFile); - bulkLoadFileDAO.merge(bulkLoadFile); + if (!validSchemaVersion(bulkLoadFileHistory.getBulkLoadFile().getLinkMLSchemaVersion(), dtoClass)) { + bulkLoadFileHistory.setErrorMessage("Invalid Schema Version: " + bulkLoadFileHistory.getBulkLoadFile().getLinkMLSchemaVersion()); + bulkLoadFileHistory.setBulkloadStatus(JobStatus.FAILED); + slackNotifier.slackalert(bulkLoadFileHistory); + bulkLoadFileHistoryDAO.merge(bulkLoadFileHistory); return false; } return true; } - protected IngestDTO readIngestFile(BulkLoadFile bulkLoadFile, Class dtoClass) { + protected IngestDTO readIngestFile(BulkLoadFileHistory bulkLoadFileHistory, Class dtoClass) { try { - IngestDTO ingestDto = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFile.getLocalFilePath())), IngestDTO.class); - bulkLoadFile.setLinkMLSchemaVersion(getVersionNumber(ingestDto.getLinkMLVersion())); + IngestDTO ingestDto = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())), IngestDTO.class); + bulkLoadFileHistory.getBulkLoadFile().setLinkMLSchemaVersion(getVersionNumber(ingestDto.getLinkMLVersion())); if (StringUtils.isNotBlank(ingestDto.getAllianceMemberReleaseVersion())) { - bulkLoadFile.setAllianceMemberReleaseVersion(ingestDto.getAllianceMemberReleaseVersion()); + bulkLoadFileHistory.getBulkLoadFile().setAllianceMemberReleaseVersion(ingestDto.getAllianceMemberReleaseVersion()); } - - if (!checkSchemaVersion(bulkLoadFile, dtoClass)) { + + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); + + if (!checkSchemaVersion(bulkLoadFileHistory, dtoClass)) { return null; } return ingestDto; } catch (Exception e) { - failLoad(bulkLoadFile, e); + failLoad(bulkLoadFileHistory, e); e.printStackTrace(); } return null; @@ -214,7 +204,8 @@ protected boolean runLoad(BaseUpser loadMessage = loadMessage + " for " + dataProvider.name(); } ph.startProcess(loadMessage, objectList.size()); - + + updateHistory(history); for (T dtoObject : objectList) { try { E dbObject = service.upsert(dtoObject, dataProvider); @@ -234,23 +225,24 @@ protected boolean runLoad(BaseUpser if (terminateFailing && history.getErrorRate() > 0.25) { Log.error("Failure Rate > 25% aborting load"); finalSaveHistory(history); - failLoadAboveErrorRateCutoff(history.getBulkLoadFile()); + failLoadAboveErrorRateCutoff(history); return false; } - updateHistory(history); ph.progressProcess(); } + updateHistory(history); + ph.finishProcess(); } return true; } - protected > void runCleanup(S service, BulkLoadFileHistory history, String dataProviderName, List annotationIdsBefore, List annotationIdsAfter, String loadTypeString, String md5sum) { - runCleanup(service, history, dataProviderName, annotationIdsBefore, annotationIdsAfter, loadTypeString, md5sum, true); + protected > void runCleanup(S service, BulkLoadFileHistory history, String dataProviderName, List annotationIdsBefore, List annotationIdsAfter, String loadTypeString) { + runCleanup(service, history, dataProviderName, annotationIdsBefore, annotationIdsAfter, loadTypeString, true); } // The following methods are for bulk validation - protected > void runCleanup(S service, BulkLoadFileHistory history, String dataProviderName, List annotationIdsBefore, List annotationIdsAfter, String loadTypeString, String md5sum, Boolean deprecate) { + protected > void runCleanup(S service, BulkLoadFileHistory history, String dataProviderName, List annotationIdsBefore, List annotationIdsAfter, String loadTypeString, Boolean deprecate) { Log.debug("runLoad: After: " + dataProviderName + " " + annotationIdsAfter.size()); List distinctAfter = annotationIdsAfter.stream().distinct().collect(Collectors.toList()); @@ -260,26 +252,27 @@ protected boolean runLoad(BaseUpser Log.debug("runLoad: Remove: " + dataProviderName + " " + idsToRemove.size()); long existingDeletes = history.getTotalDeleteRecords() == null ? 0 : history.getTotalDeleteRecords(); - history.setTotalDeleteRecords((long) idsToRemove.size() + existingDeletes); + history.setTotalDeleteRecords(idsToRemove.size() + existingDeletes); ProcessDisplayHelper ph = new ProcessDisplayHelper(10000); ph.startProcess("Deletion/deprecation of entities linked to unloaded " + dataProviderName, idsToRemove.size()); + updateHistory(history); for (Long id : idsToRemove) { try { - String loadDescription = dataProviderName + " " + loadTypeString + " bulk load (" + md5sum + ")"; + String loadDescription = dataProviderName + " " + loadTypeString + " bulk load (" + history.getBulkLoadFile().getMd5Sum() + ")"; service.deprecateOrDelete(id, false, loadDescription, deprecate); history.incrementDeleted(); } catch (Exception e) { history.incrementDeleteFailed(); addException(history, new ObjectUpdateExceptionData("{ \"id\": " + id + "}", e.getMessage(), e.getStackTrace())); } - updateHistory(history); ph.progressProcess(); } + updateHistory(history); ph.finishProcess(); } - protected void failLoad(BulkLoadFile bulkLoadFile, Exception e) { + protected void failLoad(BulkLoadFileHistory bulkLoadFileHistory, Exception e) { Set errorMessages = new LinkedHashSet(); errorMessages.add(e.getMessage()); errorMessages.add(e.getLocalizedMessage()); @@ -288,16 +281,16 @@ protected void failLoad(BulkLoadFile bulkLoadFile, Exception e) { errorMessages.add(cause.getMessage()); cause = cause.getCause(); } - bulkLoadFile.setErrorMessage(String.join("|", errorMessages)); - bulkLoadFile.setBulkloadStatus(JobStatus.FAILED); - slackNotifier.slackalert(bulkLoadFile); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileHistory.setErrorMessage(String.join("|", errorMessages)); + bulkLoadFileHistory.setBulkloadStatus(JobStatus.FAILED); + slackNotifier.slackalert(bulkLoadFileHistory); + updateHistory(bulkLoadFileHistory); } - protected void failLoadAboveErrorRateCutoff(BulkLoadFile bulkLoadFile) { - bulkLoadFile.setBulkloadStatus(JobStatus.FAILED); - bulkLoadFile.setErrorMessage("Failure rate exceeded cutoff"); - slackNotifier.slackalert(bulkLoadFile); - bulkLoadFileDAO.merge(bulkLoadFile); + protected void failLoadAboveErrorRateCutoff(BulkLoadFileHistory bulkLoadFileHistory) { + bulkLoadFileHistory.setBulkloadStatus(JobStatus.FAILED); + bulkLoadFileHistory.setErrorMessage("Failure rate exceeded cutoff"); + slackNotifier.slackalert(bulkLoadFileHistory); + updateHistory(bulkLoadFileHistory); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/MoleculeExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/MoleculeExecutor.java index 8a8b58a52..9f8126459 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/MoleculeExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/MoleculeExecutor.java @@ -5,7 +5,6 @@ import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.entities.Molecule; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.MoleculeIngestFmsDTO; import org.alliancegenome.curation_api.services.MoleculeService; @@ -19,24 +18,25 @@ public class MoleculeExecutor extends LoadFileExecutor { @Inject MoleculeService moleculeService; - public void execLoad(BulkLoadFile bulkLoadFile) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { - MoleculeIngestFmsDTO moleculeData = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFile.getLocalFilePath())), MoleculeIngestFmsDTO.class); - bulkLoadFile.setRecordCount(moleculeData.getData().size()); - if (bulkLoadFile.getLinkMLSchemaVersion() == null) { + MoleculeIngestFmsDTO moleculeData = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())), MoleculeIngestFmsDTO.class); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(moleculeData.getData().size()); + if (bulkLoadFileHistory.getBulkLoadFile().getLinkMLSchemaVersion() == null) { AGRCurationSchemaVersion version = Molecule.class.getAnnotation(AGRCurationSchemaVersion.class); - bulkLoadFile.setLinkMLSchemaVersion(version.max()); + bulkLoadFileHistory.getBulkLoadFile().setLinkMLSchemaVersion(version.max()); } if (moleculeData.getMetaData() != null && StringUtils.isNotBlank(moleculeData.getMetaData().getRelease())) { - bulkLoadFile.setAllianceMemberReleaseVersion(moleculeData.getMetaData().getRelease()); + bulkLoadFileHistory.getBulkLoadFile().setAllianceMemberReleaseVersion(moleculeData.getMetaData().getRelease()); } - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(moleculeData.getData().size()); - createHistory(history, bulkLoadFile); - runLoad(moleculeService, history, null, moleculeData.getData(), null); - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.setTotalRecords((long) moleculeData.getData().size()); + updateHistory(bulkLoadFileHistory); + + runLoad(moleculeService, bulkLoadFileHistory, null, moleculeData.getData(), null); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } catch (Exception e) { e.printStackTrace(); } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/OntologyExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/OntologyExecutor.java index cbd396d53..0956b296c 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/OntologyExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/OntologyExecutor.java @@ -1,14 +1,13 @@ package org.alliancegenome.curation_api.jobs.executors; import java.io.FileInputStream; -import java.time.OffsetDateTime; import java.util.Map; import java.util.Map.Entry; import java.util.zip.GZIPInputStream; import org.alliancegenome.curation_api.dao.loads.BulkLoadFileDAO; import org.alliancegenome.curation_api.enums.OntologyBulkLoadType; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.entities.ontology.OntologyTerm; import org.alliancegenome.curation_api.services.base.BaseOntologyTermService; import org.alliancegenome.curation_api.services.helpers.GenericOntologyLoadConfig; @@ -21,9 +20,9 @@ import org.alliancegenome.curation_api.services.ontology.CmoTermService; import org.alliancegenome.curation_api.services.ontology.DaoTermService; import org.alliancegenome.curation_api.services.ontology.DoTermService; -import org.alliancegenome.curation_api.services.ontology.FbcvTermService; import org.alliancegenome.curation_api.services.ontology.EcoTermService; import org.alliancegenome.curation_api.services.ontology.EmapaTermService; +import org.alliancegenome.curation_api.services.ontology.FbcvTermService; import org.alliancegenome.curation_api.services.ontology.FbdvTermService; import org.alliancegenome.curation_api.services.ontology.GenoTermService; import org.alliancegenome.curation_api.services.ontology.GoTermService; @@ -112,20 +111,19 @@ public class OntologyExecutor { @Inject BulkLoadFileDAO bulkLoadFileDAO; @Inject LoadProcessDisplayService loadProcessDisplayService; - public void execLoad(BulkLoadFile bulkLoadFile) throws Exception { - bulkLoadFile.setRecordCount(0); + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) throws Exception { GenericOntologyLoadConfig config = new GenericOntologyLoadConfig(); - OntologyBulkLoadType ontologyType = bulkLoadFile.getBulkLoad().getOntologyType(); + OntologyBulkLoadType ontologyType = bulkLoadFileHistory.getBulkLoad().getOntologyType(); switch (ontologyType) { case ZECO -> { config.setLoadOnlyIRIPrefix("ZECO"); - processTerms(bulkLoadFile, zecoTermService, config); + processTerms(bulkLoadFileHistory, zecoTermService, config); } case EMAPA -> { config.getAltNameSpaces().add("anatomical_structure"); - processTerms(bulkLoadFile, emapaTermService, config); + processTerms(bulkLoadFileHistory, emapaTermService, config); } case GO -> { config.setLoadOnlyIRIPrefix("GO"); // GO has to have both prefix and namespaces as obsolete terms do not show up in @@ -133,137 +131,137 @@ public void execLoad(BulkLoadFile bulkLoadFile) throws Exception { config.getAltNameSpaces().add("biological_process"); config.getAltNameSpaces().add("molecular_function"); config.getAltNameSpaces().add("cellular_component"); - processTerms(bulkLoadFile, goTermService, config); + processTerms(bulkLoadFileHistory, goTermService, config); } - case SO -> processTerms(bulkLoadFile, soTermService, config); - case XCO -> processTerms(bulkLoadFile, xcoTermService, config); + case SO -> processTerms(bulkLoadFileHistory, soTermService, config); + case XCO -> processTerms(bulkLoadFileHistory, xcoTermService, config); case ECO -> { - processTerms(bulkLoadFile, ecoTermService, config); + processTerms(bulkLoadFileHistory, ecoTermService, config); ecoTermService.updateAbbreviations(); } case CHEBI -> { config.setLoadOnlyIRIPrefix("CHEBI"); - processTerms(bulkLoadFile, chebiTermService, config); + processTerms(bulkLoadFileHistory, chebiTermService, config); } case ZFA -> { config.getAltNameSpaces().add("zebrafish_anatomy"); - processTerms(bulkLoadFile, zfaTermService, config); + processTerms(bulkLoadFileHistory, zfaTermService, config); } - case DO -> processTerms(bulkLoadFile, doTermService, config); + case DO -> processTerms(bulkLoadFileHistory, doTermService, config); case MP -> { config.setLoadOnlyIRIPrefix("MP"); - processTerms(bulkLoadFile, mpTermService, config); + processTerms(bulkLoadFileHistory, mpTermService, config); } case RO -> { config.setLoadObjectProperties(true); config.setLoadOnlyIRIPrefix("RO"); - processTerms(bulkLoadFile, roTermService, config); + processTerms(bulkLoadFileHistory, roTermService, config); } - case MA -> processTerms(bulkLoadFile, maTermService, config); - case WBBT -> processTerms(bulkLoadFile, wbbtTermService, config); + case MA -> processTerms(bulkLoadFileHistory, maTermService, config); + case WBBT -> processTerms(bulkLoadFileHistory, wbbtTermService, config); case DAO -> { config.setLoadOnlyIRIPrefix("FBbt"); - processTerms(bulkLoadFile, daoTermService, config); + processTerms(bulkLoadFileHistory, daoTermService, config); } - case WBLS -> processTerms(bulkLoadFile, wblsTermService, config); - case FBDV -> processTerms(bulkLoadFile, fbdvTermService, config); + case WBLS -> processTerms(bulkLoadFileHistory, wblsTermService, config); + case FBDV -> processTerms(bulkLoadFileHistory, fbdvTermService, config); case MMUSDV -> { config.getAltNameSpaces().add("mouse_developmental_stage"); config.getAltNameSpaces().add("mouse_stages_ontology"); - processTerms(bulkLoadFile, mmusdvTermService, config); + processTerms(bulkLoadFileHistory, mmusdvTermService, config); } - case ZFS -> processTerms(bulkLoadFile, zfsTermService, config); + case ZFS -> processTerms(bulkLoadFileHistory, zfsTermService, config); case XBA_XBS -> { config.getAltNameSpaces().add("xenopus_anatomy"); config.getAltNameSpaces().add("xenopus_anatomy_in_vitro"); - processTerms(bulkLoadFile, OntologyBulkLoadType.XBA, xbaTermService, config); + processTerms(bulkLoadFileHistory, OntologyBulkLoadType.XBA, xbaTermService, config); GenericOntologyLoadConfig config2 = new GenericOntologyLoadConfig(); config2.getAltNameSpaces().add("xenopus_developmental_stage"); - processTerms(bulkLoadFile, OntologyBulkLoadType.XBS, xbsTermService, config2); + processTerms(bulkLoadFileHistory, OntologyBulkLoadType.XBS, xbsTermService, config2); } case XPO -> { config.setLoadOnlyIRIPrefix("XPO"); - processTerms(bulkLoadFile, xpoTermService, config); + processTerms(bulkLoadFileHistory, xpoTermService, config); } case ATP -> { config.setLoadOnlyIRIPrefix("ATP"); - processTerms(bulkLoadFile, atpTermService, config); + processTerms(bulkLoadFileHistory, atpTermService, config); } - case XBED -> processTerms(bulkLoadFile, xbedTermService, config); - case VT -> processTerms(bulkLoadFile, vtTermService, config); - case XSMO -> processTerms(bulkLoadFile, xsmoTermService, config); + case XBED -> processTerms(bulkLoadFileHistory, xbedTermService, config); + case VT -> processTerms(bulkLoadFileHistory, vtTermService, config); + case XSMO -> processTerms(bulkLoadFileHistory, xsmoTermService, config); case OBI -> { config.setLoadOnlyIRIPrefix("OBI"); - processTerms(bulkLoadFile, obiTermService, config); + processTerms(bulkLoadFileHistory, obiTermService, config); } - case WBPheno -> processTerms(bulkLoadFile, wbPhenotypeTermService, config); - case PATO -> processTerms(bulkLoadFile, patoTermService, config); + case WBPheno -> processTerms(bulkLoadFileHistory, wbPhenotypeTermService, config); + case PATO -> processTerms(bulkLoadFileHistory, patoTermService, config); case HP -> { config.setLoadOnlyIRIPrefix("HP"); - processTerms(bulkLoadFile, hpTermService, config); + processTerms(bulkLoadFileHistory, hpTermService, config); } case FBCV -> { config.setLoadOnlyIRIPrefix("FBcv"); - processTerms(bulkLoadFile, fbcvTermService, config); + processTerms(bulkLoadFileHistory, fbcvTermService, config); } - case MMO -> processTerms(bulkLoadFile, mmoTermService, config); + case MMO -> processTerms(bulkLoadFileHistory, mmoTermService, config); case APO -> { config.getAltNameSpaces().add("experiment_type"); config.getAltNameSpaces().add("mutant_type"); config.getAltNameSpaces().add("observable"); config.getAltNameSpaces().add("qualifier"); - processTerms(bulkLoadFile, apoTermService, config); + processTerms(bulkLoadFileHistory, apoTermService, config); } - case MI -> processTerms(bulkLoadFile, miTermService, config); - case MPATH -> processTerms(bulkLoadFile, mpathTermService, config); - case MOD -> processTerms(bulkLoadFile, modTermService, config); + case MI -> processTerms(bulkLoadFileHistory, miTermService, config); + case MPATH -> processTerms(bulkLoadFileHistory, mpathTermService, config); + case MOD -> processTerms(bulkLoadFileHistory, modTermService, config); case UBERON -> { config.setLoadOnlyIRIPrefix("UBERON"); - processTerms(bulkLoadFile, uberonTermService, config); + processTerms(bulkLoadFileHistory, uberonTermService, config); } - case RS -> processTerms(bulkLoadFile, rsTermService, config); - case PW -> processTerms(bulkLoadFile, pwTermService, config); + case RS -> processTerms(bulkLoadFileHistory, rsTermService, config); + case PW -> processTerms(bulkLoadFileHistory, pwTermService, config); case CL -> { config.setLoadOnlyIRIPrefix("CL"); - processTerms(bulkLoadFile, clTermService, config); + processTerms(bulkLoadFileHistory, clTermService, config); } case CMO -> { config.setLoadOnlyIRIPrefix("CMO"); - processTerms(bulkLoadFile, cmoTermService, config); + processTerms(bulkLoadFileHistory, cmoTermService, config); } case BSPO -> { config.setLoadOnlyIRIPrefix("BSPO"); - processTerms(bulkLoadFile, bspoTermService, config); + processTerms(bulkLoadFileHistory, bspoTermService, config); } case GENO -> { config.setLoadOnlyIRIPrefix("GENO"); - processTerms(bulkLoadFile, genoTermService, config); + processTerms(bulkLoadFileHistory, genoTermService, config); } default -> { - log.info("Ontology Load: " + bulkLoadFile.getBulkLoad().getName() + " for OT: " + ontologyType + " not implemented"); - throw new Exception("Ontology Load: " + bulkLoadFile.getBulkLoad().getName() + " for OT: " + ontologyType + " not implemented"); + log.info("Ontology Load: " + bulkLoadFileHistory.getBulkLoad().getName() + " for OT: " + ontologyType + " not implemented"); + throw new Exception("Ontology Load: " + bulkLoadFileHistory.getBulkLoad().getName() + " for OT: " + ontologyType + " not implemented"); } } } - private void processTerms(BulkLoadFile bulkLoadFile, BaseOntologyTermService service, GenericOntologyLoadConfig config) throws Exception { - processTerms(bulkLoadFile, bulkLoadFile.getBulkLoad().getOntologyType(), service, config); + private void processTerms(BulkLoadFileHistory bulkLoadFileHistory, BaseOntologyTermService service, GenericOntologyLoadConfig config) throws Exception { + processTerms(bulkLoadFileHistory, bulkLoadFileHistory.getBulkLoad().getOntologyType(), service, config); } - private void processTerms(BulkLoadFile bulkLoadFile, OntologyBulkLoadType ontologyType, BaseOntologyTermService service, GenericOntologyLoadConfig config) throws Exception { + private void processTerms(BulkLoadFileHistory bulkLoadFileHistory, OntologyBulkLoadType ontologyType, BaseOntologyTermService service, GenericOntologyLoadConfig config) throws Exception { GenericOntologyLoadHelper loader = new GenericOntologyLoadHelper<>(ontologyType.getClazz(), config); - Map termMap = loader.load(new GZIPInputStream(new FileInputStream(bulkLoadFile.getLocalFilePath()))); + Map termMap = loader.load(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()))); - bulkLoadFile.setRecordCount(bulkLoadFile.getRecordCount() + termMap.size()); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(bulkLoadFileHistory.getBulkLoadFile().getRecordCount() + termMap.size()); - bulkLoadFile.setDateLastLoaded(OffsetDateTime.now()); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); + ProcessDisplayHelper ph = new ProcessDisplayHelper(); ph.addDisplayHandler(loadProcessDisplayService); - ph.startProcess(bulkLoadFile.getBulkLoad().getName() + ": " + ontologyType.getClazz().getSimpleName() + " Terms", termMap.size()); + ph.startProcess(bulkLoadFileHistory.getBulkLoad().getName() + ": " + ontologyType.getClazz().getSimpleName() + " Terms", termMap.size()); for (Entry entry : termMap.entrySet()) { service.processUpdate(entry.getValue()); ph.progressProcess(); @@ -272,7 +270,7 @@ private void processTerms(BulkLoadFile bulkLoadFile, OntologyBulkLoadType ontolo ProcessDisplayHelper ph1 = new ProcessDisplayHelper(); ph.addDisplayHandler(loadProcessDisplayService); - ph1.startProcess(bulkLoadFile.getBulkLoad().getName() + ": " + ontologyType.getClazz().getSimpleName() + " Closure", termMap.size()); + ph1.startProcess(bulkLoadFileHistory.getBulkLoad().getName() + ": " + ontologyType.getClazz().getSimpleName() + " Closure", termMap.size()); for (Entry entry : termMap.entrySet()) { service.processUpdateRelationships(entry.getValue()); // Thread.sleep(5000); @@ -282,7 +280,7 @@ private void processTerms(BulkLoadFile bulkLoadFile, OntologyBulkLoadType ontolo ProcessDisplayHelper ph2 = new ProcessDisplayHelper(); ph.addDisplayHandler(loadProcessDisplayService); - ph2.startProcess(bulkLoadFile.getBulkLoad().getName() + ": " + ontologyType.getClazz().getSimpleName() + " Counts", termMap.size()); + ph2.startProcess(bulkLoadFileHistory.getBulkLoad().getName() + ": " + ontologyType.getClazz().getSimpleName() + " Counts", termMap.size()); for (Entry entry : termMap.entrySet()) { service.processCounts(entry.getValue()); // Thread.sleep(5000); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/OrthologyExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/OrthologyExecutor.java index bf36f066a..9066c010d 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/OrthologyExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/OrthologyExecutor.java @@ -9,7 +9,6 @@ import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.entities.orthology.GeneToGeneOrthologyGenerated; import org.alliancegenome.curation_api.model.ingest.dto.fms.OrthologyIngestFmsDTO; @@ -27,17 +26,17 @@ public class OrthologyExecutor extends LoadFileExecutor { @Inject GeneToGeneOrthologyGeneratedService generatedOrthologyService; @Inject GeneToGeneOrthologyGeneratedDAO generatedOrthologyDAO; - public void execLoad(BulkLoadFile bulkLoadFile) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { - BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFile.getBulkLoad(); + BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); - OrthologyIngestFmsDTO orthologyData = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFile.getLocalFilePath())), OrthologyIngestFmsDTO.class); - bulkLoadFile.setRecordCount(orthologyData.getData().size()); + OrthologyIngestFmsDTO orthologyData = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())), OrthologyIngestFmsDTO.class); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(orthologyData.getData().size()); AGRCurationSchemaVersion version = GeneToGeneOrthologyGenerated.class.getAnnotation(AGRCurationSchemaVersion.class); - bulkLoadFile.setLinkMLSchemaVersion(version.max()); + bulkLoadFileHistory.getBulkLoadFile().setLinkMLSchemaVersion(version.max()); if (orthologyData.getMetaData() != null && StringUtils.isNotBlank(orthologyData.getMetaData().getRelease())) { - bulkLoadFile.setAllianceMemberReleaseVersion(orthologyData.getMetaData().getRelease()); + bulkLoadFileHistory.getBulkLoadFile().setAllianceMemberReleaseVersion(orthologyData.getMetaData().getRelease()); } List orthoPairIdsLoaded = new ArrayList<>(); @@ -45,18 +44,19 @@ public void execLoad(BulkLoadFile bulkLoadFile) { List orthoPairIdsBefore = generatedOrthologyService.getAllOrthologyPairIdsBySubjectGeneDataProvider(dataProvider); log.debug("runLoad: Before: total " + orthoPairIdsBefore.size()); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(orthologyData.getData().size()); - createHistory(history, bulkLoadFile); - boolean success = runLoad(generatedOrthologyService, history, dataProvider, orthologyData.getData(), orthoPairIdsLoaded); + bulkLoadFileHistory.setTotalRecords((long) orthologyData.getData().size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(generatedOrthologyService, bulkLoadFileHistory, dataProvider, orthologyData.getData(), orthoPairIdsLoaded); if (success) { - runCleanup(generatedOrthologyService, history, fms.getFmsDataSubType(), orthoPairIdsBefore, orthoPairIdsLoaded, fms.getFmsDataType(), bulkLoadFile.getMd5Sum(), false); + runCleanup(generatedOrthologyService, bulkLoadFileHistory, fms.getFmsDataSubType(), orthoPairIdsBefore, orthoPairIdsLoaded, fms.getFmsDataType(), false); } - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } catch (Exception e) { - failLoad(bulkLoadFile, e); + failLoad(bulkLoadFileHistory, e); e.printStackTrace(); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/ParalogyExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/ParalogyExecutor.java index f2cbf69c6..a1daf443e 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/ParalogyExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/ParalogyExecutor.java @@ -10,7 +10,6 @@ import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.entities.GeneToGeneParalogy; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.ParalogyIngestFmsDTO; import org.alliancegenome.curation_api.services.GeneToGeneParalogyService; @@ -26,17 +25,17 @@ public class ParalogyExecutor extends LoadFileExecutor { @Inject GeneToGeneParalogyService geneToGeneParalogyService; @Inject GeneToGeneParalogyDAO geneToGeneParalogyDAO; - public void execLoad(BulkLoadFile bulkLoadFile) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { - BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFile.getBulkLoad(); + BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); - ParalogyIngestFmsDTO paralogyData = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFile.getLocalFilePath())), ParalogyIngestFmsDTO.class); - bulkLoadFile.setRecordCount(paralogyData.getData().size()); + ParalogyIngestFmsDTO paralogyData = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())), ParalogyIngestFmsDTO.class); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(paralogyData.getData().size()); AGRCurationSchemaVersion version = GeneToGeneParalogy.class.getAnnotation(AGRCurationSchemaVersion.class); - bulkLoadFile.setLinkMLSchemaVersion(version.max()); + bulkLoadFileHistory.getBulkLoadFile().setLinkMLSchemaVersion(version.max()); if (paralogyData.getMetaData() != null && StringUtils.isNotBlank(paralogyData.getMetaData().getRelease())) { - bulkLoadFile.setAllianceMemberReleaseVersion(paralogyData.getMetaData().getRelease()); + bulkLoadFileHistory.getBulkLoadFile().setAllianceMemberReleaseVersion(paralogyData.getMetaData().getRelease()); } List paralogyIdsLoaded = new ArrayList<>(); @@ -44,21 +43,22 @@ public void execLoad(BulkLoadFile bulkLoadFile) { List paralogyPairsBefore = geneToGeneParalogyService.getAllParalogyPairIdsBySubjectGeneDataProvider(dataProvider); Log.debug("runLoad: Before: total " + paralogyPairsBefore.size()); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(paralogyData.getData().size()); - createHistory(history, bulkLoadFile); - boolean success = runLoad(geneToGeneParalogyService, history, dataProvider, paralogyData.getData(), paralogyIdsLoaded, false); + bulkLoadFileHistory.setTotalRecords((long) paralogyData.getData().size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(geneToGeneParalogyService, bulkLoadFileHistory, dataProvider, paralogyData.getData(), paralogyIdsLoaded, false); if (success) { - runCleanup(geneToGeneParalogyService, history, fms.getFmsDataSubType(), paralogyPairsBefore, paralogyIdsLoaded, fms.getFmsDataType(), bulkLoadFile.getMd5Sum(), false); + runCleanup(geneToGeneParalogyService, bulkLoadFileHistory, fms.getFmsDataSubType(), paralogyPairsBefore, paralogyIdsLoaded, fms.getFmsDataType(), false); } - history.finishLoad(); + bulkLoadFileHistory.finishLoad(); - finalSaveHistory(history); + finalSaveHistory(bulkLoadFileHistory); } catch (Exception e) { - failLoad(bulkLoadFile, e); + failLoad(bulkLoadFileHistory, e); e.printStackTrace(); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/PhenotypeAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/PhenotypeAnnotationExecutor.java index 772f78a46..cb7847068 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/PhenotypeAnnotationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/PhenotypeAnnotationExecutor.java @@ -13,7 +13,6 @@ import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.entities.Molecule; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.PhenotypeFmsDTO; import org.alliancegenome.curation_api.model.ingest.dto.fms.PhenotypeIngestFmsDTO; @@ -32,35 +31,37 @@ public class PhenotypeAnnotationExecutor extends LoadFileExecutor { @Inject PhenotypeAnnotationService phenotypeAnnotationService; - public void execLoad(BulkLoadFile bulkLoadFile) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { - BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFile.getBulkLoad(); + BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); - PhenotypeIngestFmsDTO phenotypeData = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFile.getLocalFilePath())), PhenotypeIngestFmsDTO.class); - bulkLoadFile.setRecordCount(phenotypeData.getData().size()); - if (bulkLoadFile.getLinkMLSchemaVersion() == null) { + PhenotypeIngestFmsDTO phenotypeData = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())), PhenotypeIngestFmsDTO.class); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(phenotypeData.getData().size()); + if (bulkLoadFileHistory.getBulkLoadFile().getLinkMLSchemaVersion() == null) { AGRCurationSchemaVersion version = Molecule.class.getAnnotation(AGRCurationSchemaVersion.class); - bulkLoadFile.setLinkMLSchemaVersion(version.max()); + bulkLoadFileHistory.getBulkLoadFile().setLinkMLSchemaVersion(version.max()); } if (phenotypeData.getMetaData() != null && StringUtils.isNotBlank(phenotypeData.getMetaData().getRelease())) { - bulkLoadFile.setAllianceMemberReleaseVersion(phenotypeData.getMetaData().getRelease()); + bulkLoadFileHistory.getBulkLoadFile().setAllianceMemberReleaseVersion(phenotypeData.getMetaData().getRelease()); } - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); + + bulkLoadFileHistory.setTotalRecords((long) phenotypeData.getData().size()); + updateHistory(bulkLoadFileHistory); - BulkLoadFileHistory history = new BulkLoadFileHistory(phenotypeData.getData().size()); - createHistory(history, bulkLoadFile); Set annotationIdsLoaded = new HashSet<>(); List annotationIdsBefore = phenotypeAnnotationService.getAnnotationIdsByDataProvider(dataProvider); - runLoad(history, phenotypeData.getData(), annotationIdsLoaded, dataProvider); + runLoad(bulkLoadFileHistory, phenotypeData.getData(), annotationIdsLoaded, dataProvider); - runCleanup(phenotypeAnnotationService, history, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded.stream().collect(Collectors.toList()), "phenotype annotation", bulkLoadFile.getMd5Sum()); + runCleanup(phenotypeAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded.stream().collect(Collectors.toList()), "phenotype annotation"); - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } catch (Exception e) { + failLoad(bulkLoadFileHistory, e); e.printStackTrace(); } } @@ -109,10 +110,10 @@ private void loadSecondaryAnnotations(BulkLoadFileHistory history, List annotations, Set idsAdded, BackendBulkDataProvider dataProvider, ProcessDisplayHelper ph) { @@ -137,9 +138,10 @@ private void loadPrimaryAnnotations(BulkLoadFileHistory history, List dtos = mapper.readValue(new GZIPInputStream(new FileInputStream(rdFile)), listType); List rdNamesBefore = resourceDescriptorService.getAllNames(); List rdNamesAfter = new ArrayList<>(); - BulkLoadFileHistory history = new BulkLoadFileHistory(dtos.size()); - createHistory(history, bulkLoadFile); - - dtos.forEach(dto -> { + + bulkLoadFileHistory.setTotalRecords((long) dtos.size()); + + updateHistory(bulkLoadFileHistory); + for (ResourceDescriptorDTO dto : dtos) { try { ResourceDescriptor rd = resourceDescriptorService.upsert(dto); - history.incrementCompleted(); - updateHistory(history); + bulkLoadFileHistory.incrementCompleted(); rdNamesAfter.add(rd.getName()); } catch (ObjectUpdateException e) { - addException(history, e.getData()); + bulkLoadFileHistory.incrementFailed(); + addException(bulkLoadFileHistory, e.getData()); } catch (Exception e) { - addException(history, new ObjectUpdateExceptionData(dto, e.getMessage(), e.getStackTrace())); + bulkLoadFileHistory.incrementFailed(); + addException(bulkLoadFileHistory, new ObjectUpdateExceptionData(dto, e.getMessage(), e.getStackTrace())); } - }); - history.finishLoad(); - finalSaveHistory(history); + } + updateHistory(bulkLoadFileHistory); + + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); resourceDescriptorService.removeNonUpdatedResourceDescriptors(rdNamesBefore, rdNamesAfter); log.info("Loading ResourceDescriptorFileFinished"); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/SequenceTargetingReagentExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/SequenceTargetingReagentExecutor.java index fc16b5cab..14fda2aee 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/SequenceTargetingReagentExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/SequenceTargetingReagentExecutor.java @@ -13,7 +13,6 @@ import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.entities.SequenceTargetingReagent; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.SequenceTargetingReagentFmsDTO; import org.alliancegenome.curation_api.model.ingest.dto.fms.SequenceTargetingReagentIngestFmsDTO; @@ -34,24 +33,20 @@ public class SequenceTargetingReagentExecutor extends LoadFileExecutor { @Inject SequenceTargetingReagentGeneAssociationService sqtrGeneAssociationService; - public void execLoad(BulkLoadFile bulkLoadFile) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { - BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFile.getBulkLoad(); + BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); - SequenceTargetingReagentIngestFmsDTO sqtrIngestFmsDTO = mapper.readValue( - new GZIPInputStream(new FileInputStream(bulkLoadFile.getLocalFilePath())), - SequenceTargetingReagentIngestFmsDTO.class); - bulkLoadFile.setRecordCount(sqtrIngestFmsDTO.getData().size()); + SequenceTargetingReagentIngestFmsDTO sqtrIngestFmsDTO = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())), SequenceTargetingReagentIngestFmsDTO.class); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(sqtrIngestFmsDTO.getData().size()); - AGRCurationSchemaVersion version = SequenceTargetingReagent.class - .getAnnotation(AGRCurationSchemaVersion.class); - bulkLoadFile.setLinkMLSchemaVersion(version.max()); + AGRCurationSchemaVersion version = SequenceTargetingReagent.class.getAnnotation(AGRCurationSchemaVersion.class); + bulkLoadFileHistory.getBulkLoadFile().setLinkMLSchemaVersion(version.max()); - if (sqtrIngestFmsDTO.getMetaData() != null - && StringUtils.isNotBlank(sqtrIngestFmsDTO.getMetaData().getRelease())) { - bulkLoadFile.setAllianceMemberReleaseVersion(sqtrIngestFmsDTO.getMetaData().getRelease()); + if (sqtrIngestFmsDTO.getMetaData() != null && StringUtils.isNotBlank(sqtrIngestFmsDTO.getMetaData().getRelease())) { + bulkLoadFileHistory.getBulkLoadFile().setAllianceMemberReleaseVersion(sqtrIngestFmsDTO.getMetaData().getRelease()); } BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fms.getFmsDataSubType()); @@ -62,20 +57,21 @@ public void execLoad(BulkLoadFile bulkLoadFile) { Map> previousIds = getPreviouslyLoadedIds(dataProvider); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(sqtrIngestFmsDTO.getData().size() * 2); + bulkLoadFileHistory.setTotalRecords((long) sqtrIngestFmsDTO.getData().size() * 2); + updateHistory(bulkLoadFileHistory); + + runLoad(bulkLoadFileHistory, dataProvider, sqtrIngestFmsDTO.getData(), idsAdded.get("SQTR"), idsAdded.get("SQTRGeneAssociation")); - runLoad(history, dataProvider, sqtrIngestFmsDTO.getData(), idsAdded.get("SQTR"), idsAdded.get("SQTRGeneAssociation")); + runCleanup(sqtrService, bulkLoadFileHistory, dataProvider.name(), previousIds.get("SQTR"), idsAdded.get("SQTR"), "SQTR"); + runCleanup(sqtrService, bulkLoadFileHistory, dataProvider.name(), previousIds.get("SQTRGeneAssociation"), idsAdded.get("SQTRGeneAssociation"), "SQTR Gene Associations"); - runCleanup(sqtrService, history, dataProvider.name(), previousIds.get("SQTR"), idsAdded.get("SQTR"), "SQTR", bulkLoadFile.getMd5Sum()); - runCleanup(sqtrService, history, dataProvider.name(), previousIds.get("SQTRGeneAssociation"), idsAdded.get("SQTRGeneAssociation"), "SQTR Gene Associations", bulkLoadFile.getMd5Sum()); + bulkLoadFileHistory.finishLoad(); - history.finishLoad(); - - updateHistory(history); + finalSaveHistory(bulkLoadFileHistory); } catch (Exception e) { - failLoad(bulkLoadFile, e); + failLoad(bulkLoadFileHistory, e); e.printStackTrace(); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/VariantExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/VariantExecutor.java index f6da35942..c3cfc31ab 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/VariantExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/VariantExecutor.java @@ -5,7 +5,6 @@ import org.alliancegenome.curation_api.dao.VariantDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkManualLoad; import org.alliancegenome.curation_api.model.ingest.dto.IngestDTO; @@ -22,12 +21,12 @@ public class VariantExecutor extends LoadFileExecutor { @Inject VariantDAO variantDAO; @Inject VariantService variantService; - public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) { - BulkManualLoad manual = (BulkManualLoad) bulkLoadFile.getBulkLoad(); + BulkManualLoad manual = (BulkManualLoad) bulkLoadFileHistory.getBulkLoad(); Log.info("Running with: " + manual.getDataProvider().name()); - IngestDTO ingestDto = readIngestFile(bulkLoadFile, VariantDTO.class); + IngestDTO ingestDto = readIngestFile(bulkLoadFileHistory, VariantDTO.class); if (ingestDto == null) { return; } @@ -46,17 +45,18 @@ public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { Log.debug("runLoad: Before: total " + variantIdsBefore.size()); } - bulkLoadFile.setRecordCount(variants.size() + bulkLoadFile.getRecordCount()); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(variants.size() + bulkLoadFileHistory.getBulkLoadFile().getRecordCount()); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(variants.size()); - createHistory(history, bulkLoadFile); - boolean success = runLoad(variantService, history, dataProvider, variants, variantIdsLoaded); + bulkLoadFileHistory.setTotalRecords((long) variants.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(variantService, bulkLoadFileHistory, dataProvider, variants, variantIdsLoaded); if (success && cleanUp) { - runCleanup(variantService, history, dataProvider.name(), variantIdsBefore, variantIdsLoaded, "variant", bulkLoadFile.getMd5Sum()); + runCleanup(variantService, bulkLoadFileHistory, dataProvider.name(), variantIdsBefore, variantIdsLoaded, "variant"); } - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/associations/alleleAssociations/AlleleGeneAssociationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/associations/alleleAssociations/AlleleGeneAssociationExecutor.java index 627597ad9..827e61c82 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/associations/alleleAssociations/AlleleGeneAssociationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/associations/alleleAssociations/AlleleGeneAssociationExecutor.java @@ -6,7 +6,6 @@ import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; import org.alliancegenome.curation_api.jobs.executors.LoadFileExecutor; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkManualLoad; import org.alliancegenome.curation_api.model.ingest.dto.IngestDTO; @@ -23,13 +22,13 @@ public class AlleleGeneAssociationExecutor extends LoadFileExecutor { @Inject AlleleGeneAssociationService alleleGeneAssociationService; - public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) { - BulkManualLoad manual = (BulkManualLoad) bulkLoadFile.getBulkLoad(); + BulkManualLoad manual = (BulkManualLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = manual.getDataProvider(); log.info("Running with dataProvider: " + dataProvider.name()); - IngestDTO ingestDto = readIngestFile(bulkLoadFile, AlleleGeneAssociationDTO.class); + IngestDTO ingestDto = readIngestFile(bulkLoadFileHistory, AlleleGeneAssociationDTO.class); if (ingestDto == null) { return; } @@ -46,17 +45,18 @@ public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { associationIdsBefore.removeIf(Objects::isNull); } - bulkLoadFile.setRecordCount(associations.size() + bulkLoadFile.getRecordCount()); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(associations.size() + bulkLoadFileHistory.getBulkLoadFile().getRecordCount()); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(associations.size()); - createHistory(history, bulkLoadFile); - boolean success = runLoad(alleleGeneAssociationService, history, dataProvider, associations, associationIdsLoaded); + bulkLoadFileHistory.setTotalRecords((long) associations.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(alleleGeneAssociationService, bulkLoadFileHistory, dataProvider, associations, associationIdsLoaded); if (success && cleanUp) { - runCleanup(alleleGeneAssociationService, history, dataProvider.name(), associationIdsBefore, associationIdsLoaded, "allele gene association", bulkLoadFile.getMd5Sum()); + runCleanup(alleleGeneAssociationService, bulkLoadFileHistory, dataProvider.name(), associationIdsBefore, associationIdsLoaded, "allele gene association"); } - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/associations/constructAssociations/ConstructGenomicEntityAssociationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/associations/constructAssociations/ConstructGenomicEntityAssociationExecutor.java index 6e9f0a485..4949ea763 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/associations/constructAssociations/ConstructGenomicEntityAssociationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/associations/constructAssociations/ConstructGenomicEntityAssociationExecutor.java @@ -7,7 +7,6 @@ import org.alliancegenome.curation_api.dao.associations.constructAssociations.ConstructGenomicEntityAssociationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; import org.alliancegenome.curation_api.jobs.executors.LoadFileExecutor; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkManualLoad; import org.alliancegenome.curation_api.model.ingest.dto.IngestDTO; @@ -25,13 +24,13 @@ public class ConstructGenomicEntityAssociationExecutor extends LoadFileExecutor @Inject ConstructGenomicEntityAssociationDAO constructGenomicEntityAssociationDAO; @Inject ConstructGenomicEntityAssociationService constructGenomicEntityAssociationService; - public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) { - BulkManualLoad manual = (BulkManualLoad) bulkLoadFile.getBulkLoad(); + BulkManualLoad manual = (BulkManualLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = manual.getDataProvider(); log.info("Running with dataProvider: " + dataProvider.name()); - IngestDTO ingestDto = readIngestFile(bulkLoadFile, ConstructGenomicEntityAssociationDTO.class); + IngestDTO ingestDto = readIngestFile(bulkLoadFileHistory, ConstructGenomicEntityAssociationDTO.class); if (ingestDto == null) { return; } @@ -48,17 +47,18 @@ public void execLoad(BulkLoadFile bulkLoadFile, Boolean cleanUp) { associationIdsBefore.removeIf(Objects::isNull); } - bulkLoadFile.setRecordCount(associations.size() + bulkLoadFile.getRecordCount()); - bulkLoadFileDAO.merge(bulkLoadFile); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(associations.size() + bulkLoadFileHistory.getBulkLoadFile().getRecordCount()); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - BulkLoadFileHistory history = new BulkLoadFileHistory(associations.size()); - createHistory(history, bulkLoadFile); - runLoad(constructGenomicEntityAssociationService, history, dataProvider, associations, associationIdsLoaded); + bulkLoadFileHistory.setTotalRecords((long) associations.size()); + updateHistory(bulkLoadFileHistory); + + runLoad(constructGenomicEntityAssociationService, bulkLoadFileHistory, dataProvider, associations, associationIdsLoaded); if (cleanUp) { - runCleanup(constructGenomicEntityAssociationService, history, dataProvider.name(), associationIdsBefore, associationIdsLoaded, "construct genomic entity association", bulkLoadFile.getMd5Sum()); + runCleanup(constructGenomicEntityAssociationService, bulkLoadFileHistory, dataProvider.name(), associationIdsBefore, associationIdsLoaded, "construct genomic entity association"); } - history.finishLoad(); - finalSaveHistory(history); + bulkLoadFileHistory.finishLoad(); + finalSaveHistory(bulkLoadFileHistory); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java b/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java index d67a683b7..7319f919c 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java @@ -1,20 +1,22 @@ package org.alliancegenome.curation_api.jobs.processors; import java.io.File; -import java.time.OffsetDateTime; +import java.time.LocalDateTime; import org.alliancegenome.curation_api.dao.loads.BulkFMSLoadDAO; import org.alliancegenome.curation_api.dao.loads.BulkLoadDAO; import org.alliancegenome.curation_api.dao.loads.BulkLoadFileDAO; +import org.alliancegenome.curation_api.dao.loads.BulkLoadFileHistoryDAO; import org.alliancegenome.curation_api.dao.loads.BulkManualLoadDAO; import org.alliancegenome.curation_api.dao.loads.BulkURLLoadDAO; import org.alliancegenome.curation_api.enums.BulkLoadCleanUp; import org.alliancegenome.curation_api.enums.JobStatus; -import org.alliancegenome.curation_api.jobs.events.PendingBulkLoadFileJobEvent; +import org.alliancegenome.curation_api.jobs.events.PendingLoadJobEvent; import org.alliancegenome.curation_api.jobs.executors.BulkLoadJobExecutor; import org.alliancegenome.curation_api.jobs.util.SlackNotifier; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoad; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.response.SearchResponse; import org.alliancegenome.curation_api.services.fms.DataFileService; import org.alliancegenome.curation_api.util.FileTransferHelper; @@ -34,8 +36,11 @@ public class BulkLoadProcessor { @Inject DataFileService fmsDataFileService; @Inject BulkLoadDAO bulkLoadDAO; - @Inject BulkManualLoadDAO bulkManualLoadDAO; @Inject BulkLoadFileDAO bulkLoadFileDAO; + @Inject BulkLoadFileHistoryDAO bulkLoadFileHistoryDAO; + + + @Inject BulkManualLoadDAO bulkManualLoadDAO; @Inject BulkFMSLoadDAO bulkFMSLoadDAO; @Inject BulkURLLoadDAO bulkURLLoadDAO; @@ -43,7 +48,7 @@ public class BulkLoadProcessor { @Inject SlackNotifier slackNotifier; - @Inject Event pendingFileJobEvents; + @Inject Event pendingFileJobEvents; protected FileTransferHelper fileHelper = new FileTransferHelper(); @@ -60,36 +65,43 @@ public class BulkLoadProcessor { // return null; // } - public void syncWithS3(BulkLoadFile bulkLoadFile) { + public void syncWithS3(BulkLoadFileHistory bulkLoadFileHistory) { + BulkLoad bulkLoad = bulkLoadFileHistory.getBulkLoad(); + BulkLoadFile bulkLoadFile = bulkLoadFileHistory.getBulkLoadFile(); Log.info("Syncing with S3"); Log.info("Local: " + bulkLoadFile.getLocalFilePath()); Log.info("S3: " + bulkLoadFile.getS3Path()); - if ((bulkLoadFile.getS3Path() != null || bulkLoadFile.generateS3MD5Path() != null) && bulkLoadFile.getLocalFilePath() == null) { + if ((bulkLoadFile.getS3Path() != null || bulkLoadFile.generateS3MD5Path(bulkLoad) != null) && bulkLoadFile.getLocalFilePath() == null) { File outfile = fileHelper.downloadFileFromS3(s3AccessKey, s3SecretKey, s3Bucket, bulkLoadFile.getS3Path()); if (outfile != null) { // log.info(outfile + " is of size: " + outfile.length()); bulkLoadFile.setFileSize(outfile.length()); bulkLoadFile.setLocalFilePath(outfile.getAbsolutePath()); + bulkLoadFileDAO.merge(bulkLoadFile); } else { // log.error("Failed to download file from S3 Path: " + s3PathPrefix + "/" + // bulkLoadFile.generateS3MD5Path()); - bulkLoadFile.setErrorMessage("Failed to download file from S3 Path: " + s3PathPrefix + "/" + bulkLoadFile.generateS3MD5Path()); - bulkLoadFile.setBulkloadStatus(JobStatus.FAILED); - slackNotifier.slackalert(bulkLoadFile); + bulkLoadFileHistory.setErrorMessage("Failed to download file from S3 Path: " + s3PathPrefix + "/" + bulkLoadFile.generateS3MD5Path(bulkLoad)); + bulkLoadFileHistory.setBulkloadStatus(JobStatus.FAILED); + slackNotifier.slackalert(bulkLoadFileHistory); + bulkLoadFileHistoryDAO.merge(bulkLoadFileHistory); } // log.info("Saving File: " + bulkLoadFile); - bulkLoadFileDAO.merge(bulkLoadFile); + } else if (bulkLoadFile.getS3Path() == null && bulkLoadFile.getLocalFilePath() != null) { if (s3AccessKey != null && s3AccessKey.length() > 0) { - String s3Path = fileHelper.uploadFileToS3(s3AccessKey, s3SecretKey, s3Bucket, s3PathPrefix, bulkLoadFile.generateS3MD5Path(), new File(bulkLoadFile.getLocalFilePath())); + String s3Path = fileHelper.uploadFileToS3(s3AccessKey, s3SecretKey, s3Bucket, s3PathPrefix, bulkLoadFile.generateS3MD5Path(bulkLoad), new File(bulkLoadFile.getLocalFilePath())); bulkLoadFile.setS3Path(s3Path); } bulkLoadFileDAO.merge(bulkLoadFile); } else if (bulkLoadFile.getS3Path() == null && bulkLoadFile.getLocalFilePath() == null) { - bulkLoadFile.setErrorMessage("Failed to download or upload file with S3 Path: " + s3PathPrefix + "/" + bulkLoadFile.generateS3MD5Path() + " Local and remote file missing"); - bulkLoadFile.setBulkloadStatus(JobStatus.FAILED); - slackNotifier.slackalert(bulkLoadFile); + bulkLoadFileHistory.setErrorMessage("Failed to download or upload file with S3 Path: " + s3PathPrefix + "/" + bulkLoadFile.generateS3MD5Path(bulkLoad) + " Local and remote file missing"); + bulkLoadFileHistory.setBulkloadStatus(JobStatus.FAILED); + slackNotifier.slackalert(bulkLoadFileHistory); + bulkLoadFileHistoryDAO.merge(bulkLoadFileHistory); + } else { + Log.info("No S3 syncing required"); } Log.info("Syncing with S3 Finished"); } @@ -108,21 +120,23 @@ protected void processFilePath(BulkLoad bulkLoad, String localFilePath, Boolean SearchResponse bulkLoadFiles = bulkLoadFileDAO.findByField("md5Sum", md5Sum); BulkLoadFile bulkLoadFile; + + BulkLoadFileHistory history = new BulkLoadFileHistory(); if (bulkLoadFiles == null || bulkLoadFiles.getResults().size() == 0) { Log.info("Bulk File does not exist creating it"); bulkLoadFile = new BulkLoadFile(); - bulkLoadFile.setBulkLoad(load); bulkLoadFile.setMd5Sum(md5Sum); bulkLoadFile.setFileSize(inputFile.length()); + if (load.getBulkloadStatus() == JobStatus.FORCED_RUNNING) { - bulkLoadFile.setBulkloadStatus(JobStatus.FORCED_PENDING); + history.setBulkloadStatus(JobStatus.FORCED_PENDING); } if (load.getBulkloadStatus() == JobStatus.SCHEDULED_RUNNING) { - bulkLoadFile.setBulkloadStatus(JobStatus.SCHEDULED_PENDING); + history.setBulkloadStatus(JobStatus.SCHEDULED_PENDING); } if (load.getBulkloadStatus() == JobStatus.MANUAL_RUNNING) { - bulkLoadFile.setBulkloadStatus(JobStatus.MANUAL_PENDING); + history.setBulkloadStatus(JobStatus.MANUAL_PENDING); } Log.info(load.getBulkloadStatus()); @@ -134,10 +148,10 @@ protected void processFilePath(BulkLoad bulkLoad, String localFilePath, Boolean bulkLoadFileDAO.persist(bulkLoadFile); } else if (load.getBulkloadStatus().isForced()) { bulkLoadFile = bulkLoadFiles.getResults().get(0); - if (bulkLoadFile.getBulkloadStatus().isNotRunning()) { + if (history.getBulkloadStatus().isNotRunning()) { bulkLoadFile.setLocalFilePath(localFilePath); - bulkLoadFile.setErrorMessage(null); - bulkLoadFile.setBulkloadStatus(JobStatus.FORCED_PENDING); + history.setErrorMessage(null); + history.setBulkloadStatus(JobStatus.FORCED_PENDING); } else { Log.warn("Bulk File is already running: " + bulkLoadFile.getMd5Sum()); Log.info("Cleaning up downloaded file: " + localFilePath); @@ -150,17 +164,18 @@ protected void processFilePath(BulkLoad bulkLoad, String localFilePath, Boolean new File(localFilePath).delete(); bulkLoadFile.setLocalFilePath(null); } + + history.setBulkLoad(bulkLoad); + history.setBulkLoadFile(bulkLoadFile); + bulkLoadFileHistoryDAO.persist(history); - if (!load.getLoadFiles().contains(bulkLoadFile)) { - load.getLoadFiles().add(bulkLoadFile); - } if (cleanUp) { bulkLoadFile.setBulkloadCleanUp(BulkLoadCleanUp.YES); } bulkLoadFileDAO.merge(bulkLoadFile); bulkLoadDAO.merge(load); - Log.info("Firing Pending Bulk File Event: " + bulkLoadFile.getId()); - pendingFileJobEvents.fire(new PendingBulkLoadFileJobEvent(bulkLoadFile.getId())); + Log.info("Firing Pending Bulk File History Event: " + history.getId()); + pendingFileJobEvents.fire(new PendingLoadJobEvent(history.getId())); } protected void startLoad(BulkLoad load) { @@ -187,26 +202,27 @@ protected void endLoad(BulkLoad load, String message, JobStatus status) { Log.info("Load: " + bulkLoad.getName() + " is finished"); } - protected void startLoadFile(BulkLoadFile bulkLoadFile) { - bulkLoadFile.setBulkloadStatus(bulkLoadFile.getBulkloadStatus().getNextStatus()); - bulkLoadFileDAO.merge(bulkLoadFile); - Log.info("Load File: " + bulkLoadFile.getMd5Sum() + " is running with file: " + bulkLoadFile.getLocalFilePath()); + protected void startLoad(BulkLoadFileHistory bulkLoadFileHistory) { + bulkLoadFileHistory.setBulkloadStatus(bulkLoadFileHistory.getBulkloadStatus().getNextStatus()); + bulkLoadFileHistoryDAO.merge(bulkLoadFileHistory); + Log.info("Load File: " + bulkLoadFileHistory.getBulkLoadFile().getMd5Sum() + " is running with file: " + bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()); } - protected void endLoadFile(BulkLoadFile bulkLoadFile, String message, JobStatus status) { - if (bulkLoadFile.getLocalFilePath() != null) { - Log.info("Removing old input file: " + bulkLoadFile.getLocalFilePath()); - new File(bulkLoadFile.getLocalFilePath()).delete(); - bulkLoadFile.setLocalFilePath(null); + protected void endLoad(BulkLoadFileHistory bulkLoadFileHistory, String message, JobStatus status) { + if (bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath() != null) { + Log.info("Removing old input file: " + bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()); + new File(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()).delete(); + bulkLoadFileHistory.getBulkLoadFile().setLocalFilePath(null); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); } - bulkLoadFile.setErrorMessage(message); - bulkLoadFile.setBulkloadStatus(status); - bulkLoadFile.setDateLastLoaded(OffsetDateTime.now()); + bulkLoadFileHistory.setErrorMessage(message); + bulkLoadFileHistory.setBulkloadStatus(status); + bulkLoadFileHistory.setLoadFinished(LocalDateTime.now()); if (status != JobStatus.FINISHED) { - slackNotifier.slackalert(bulkLoadFile); + slackNotifier.slackalert(bulkLoadFileHistory); } - bulkLoadFileDAO.merge(bulkLoadFile); - Log.info("Load File: " + bulkLoadFile.getMd5Sum() + " is finished. Message: " + message + " Status: " + status); + bulkLoadFileHistoryDAO.merge(bulkLoadFileHistory); + Log.info("Load File: " + bulkLoadFileHistory.getBulkLoadFile().getMd5Sum() + " is finished. Message: " + message + " Status: " + status); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/processors/StartLoadProcessor.java b/src/main/java/org/alliancegenome/curation_api/jobs/processors/StartLoadProcessor.java index 62b163f9b..3eb00466b 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/processors/StartLoadProcessor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/processors/StartLoadProcessor.java @@ -1,10 +1,10 @@ package org.alliancegenome.curation_api.jobs.processors; -import org.alliancegenome.curation_api.dao.loads.BulkLoadFileDAO; +import org.alliancegenome.curation_api.dao.loads.BulkLoadFileHistoryDAO; import org.alliancegenome.curation_api.enums.BulkLoadCleanUp; import org.alliancegenome.curation_api.enums.JobStatus; -import org.alliancegenome.curation_api.jobs.events.StartedBulkLoadFileJobEvent; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; +import org.alliancegenome.curation_api.jobs.events.StartedLoadJobEvent; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import io.quarkus.logging.Log; import jakarta.enterprise.context.ApplicationScoped; @@ -14,31 +14,30 @@ @ApplicationScoped public class StartLoadProcessor extends BulkLoadProcessor { - @Inject BulkLoadFileDAO bulkLoadFileDAO; + @Inject BulkLoadFileHistoryDAO bulkLoadFileHistoryDAO; - public void bulkLoadFile(@Observes StartedBulkLoadFileJobEvent event) { // An @Observes method should not be in a super class as then it gets run for - // every child class - BulkLoadFile bulkLoadFile = bulkLoadFileDAO.find(event.getId()); - if (!bulkLoadFile.getBulkloadStatus().isStarted()) { - Log.warn("bulkLoadFile: Job is not started returning: " + bulkLoadFile.getBulkloadStatus()); + public void bulkLoadFile(@Observes StartedLoadJobEvent event) { // An @Observes method should not be in a super class as then it gets run for + // every child class + BulkLoadFileHistory bulkLoadFileHistory = bulkLoadFileHistoryDAO.find(event.getId()); + + if (!bulkLoadFileHistory.getBulkloadStatus().isStarted()) { + Log.warn("bulkLoadFileHistory: Job is not started returning: " + bulkLoadFileHistory.getBulkloadStatus()); // endLoad(bulkLoadFile, "Finished ended due to status: " + // bulkLoadFile.getBulkloadStatus(), bulkLoadFile.getBulkloadStatus()); return; } else { - startLoadFile(bulkLoadFile); + startLoad(bulkLoadFileHistory); } try { - if (bulkLoadFile.getLocalFilePath() == null || bulkLoadFile.getS3Path() == null) { - syncWithS3(bulkLoadFile); - } - bulkLoadJobExecutor.process(bulkLoadFile, bulkLoadFile.getBulkloadCleanUp() == BulkLoadCleanUp.YES); - JobStatus status = bulkLoadFile.getBulkloadStatus().equals(JobStatus.FAILED) ? JobStatus.FAILED : JobStatus.FINISHED; - endLoadFile(bulkLoadFile, bulkLoadFile.getErrorMessage(), status); + syncWithS3(bulkLoadFileHistory); + bulkLoadJobExecutor.process(bulkLoadFileHistory, bulkLoadFileHistory.getBulkLoadFile().getBulkloadCleanUp() == BulkLoadCleanUp.YES); + JobStatus status = bulkLoadFileHistory.getBulkloadStatus().equals(JobStatus.FAILED) ? JobStatus.FAILED : JobStatus.FINISHED; + endLoad(bulkLoadFileHistory, bulkLoadFileHistory.getErrorMessage(), status); } catch (Exception e) { - endLoadFile(bulkLoadFile, "Failed loading: " + bulkLoadFile.getBulkLoad().getName() + " please check the logs for more info. " + bulkLoadFile.getErrorMessage(), JobStatus.FAILED); - Log.error("Load File: " + bulkLoadFile.getBulkLoad().getName() + " is failed"); + endLoad(bulkLoadFileHistory, "Failed loading: " + bulkLoadFileHistory.getBulkLoad().getName() + " please check the logs for more info. " + bulkLoadFileHistory.getErrorMessage(), JobStatus.FAILED); + Log.error("Load File: " + bulkLoadFileHistory.getBulkLoad().getName() + " is failed"); e.printStackTrace(); } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/util/SlackNotifier.java b/src/main/java/org/alliancegenome/curation_api/jobs/util/SlackNotifier.java index fd8cbeb32..1a2099773 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/util/SlackNotifier.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/util/SlackNotifier.java @@ -7,7 +7,7 @@ import org.alliancegenome.curation_api.enums.BackendBulkLoadType; import org.alliancegenome.curation_api.enums.JobStatus; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoad; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.eclipse.microprofile.config.inject.ConfigProperty; import com.slack.api.Slack; @@ -84,24 +84,24 @@ public void slackalert(BulkLoad bulkLoad) { } } - public void slackalert(BulkLoadFile bulkLoadFile) { + public void slackalert(BulkLoadFileHistory bulkLoadFileHistory) { - if (bulkLoadFile.getBulkloadStatus() == JobStatus.FAILED) { + if (bulkLoadFileHistory.getBulkloadStatus() == JobStatus.FAILED) { List fields = new ArrayList<>(); - fields.add(new Field("Load Type", String.valueOf(bulkLoadFile.getBulkLoad().getBackendBulkLoadType()), true)); - if (bulkLoadFile.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.ONTOLOGY) { - fields.add(new Field("Ontology Type", String.valueOf(bulkLoadFile.getBulkLoad().getOntologyType()), true)); + fields.add(new Field("Load Type", String.valueOf(bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType()), true)); + if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.ONTOLOGY) { + fields.add(new Field("Ontology Type", String.valueOf(bulkLoadFileHistory.getBulkLoad().getOntologyType()), true)); } - fields.add(new Field("MD5Sum", bulkLoadFile.getMd5Sum(), true)); - fields.add(new Field("File Size", String.valueOf(bulkLoadFile.getFileSize()), true)); - if (bulkLoadFile.getLinkMLSchemaVersion() != null) { - fields.add(new Field("LinkML Version", bulkLoadFile.getLinkMLSchemaVersion(), true)); + fields.add(new Field("MD5Sum", bulkLoadFileHistory.getBulkLoadFile().getMd5Sum(), true)); + fields.add(new Field("File Size", String.valueOf(bulkLoadFileHistory.getBulkLoadFile().getFileSize()), true)); + if (bulkLoadFileHistory.getBulkLoadFile().getLinkMLSchemaVersion() != null) { + fields.add(new Field("LinkML Version", bulkLoadFileHistory.getBulkLoadFile().getLinkMLSchemaVersion(), true)); } - if (bulkLoadFile.getAllianceMemberReleaseVersion() != null) { - fields.add(new Field("Alliance Member Release Version", bulkLoadFile.getAllianceMemberReleaseVersion(), false)); + if (bulkLoadFileHistory.getBulkLoadFile().getAllianceMemberReleaseVersion() != null) { + fields.add(new Field("Alliance Member Release Version", bulkLoadFileHistory.getBulkLoadFile().getAllianceMemberReleaseVersion(), false)); } - slackalert(bulkLoadFile.getBulkLoad().getGroup().getName(), bulkLoadFile.getBulkLoad().getName(), bulkLoadFile.getErrorMessage(), fields); + slackalert(bulkLoadFileHistory.getBulkLoad().getGroup().getName(), bulkLoadFileHistory.getBulkLoad().getName(), bulkLoadFileHistory.getErrorMessage(), fields); } } } \ No newline at end of file diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoad.java b/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoad.java index 6e437f466..de61c14cc 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoad.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoad.java @@ -71,10 +71,16 @@ public abstract class BulkLoad extends AuditedObject { @ManyToOne private BulkLoadGroup group; - + @JsonView({ View.FieldsOnly.class }) - @OneToMany(mappedBy = "bulkLoad", fetch = FetchType.EAGER) - @OrderBy("dateUpdated DESC") - private List loadFiles; + @OneToMany(mappedBy = "bulkLoadFile", fetch = FetchType.EAGER) + @OrderBy("loadFinished DESC") + private List history; + +// @JsonView({ View.FieldsOnly.class }) +// @OneToMany(mappedBy = "bulkLoad", fetch = FetchType.EAGER) +// @OrderBy("dateUpdated DESC") +// private List loadFiles; + } diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoadFile.java b/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoadFile.java index 49a4d5551..b969a6697 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoadFile.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoadFile.java @@ -1,21 +1,12 @@ package org.alliancegenome.curation_api.model.entities.bulkloads; -import java.time.OffsetDateTime; import java.util.List; import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; import org.alliancegenome.curation_api.enums.BulkLoadCleanUp; -import org.alliancegenome.curation_api.enums.JobStatus; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; -import org.alliancegenome.curation_api.model.bridges.OffsetDateTimeValueBridge; import org.alliancegenome.curation_api.model.entities.base.AuditedObject; import org.alliancegenome.curation_api.view.View; -import org.hibernate.search.engine.backend.types.Aggregable; -import org.hibernate.search.engine.backend.types.Searchable; -import org.hibernate.search.engine.backend.types.Sortable; -import org.hibernate.search.mapper.pojo.bridge.mapping.annotation.ValueBridgeRef; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.FullTextField; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.KeywordField; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonView; @@ -24,11 +15,8 @@ import jakarta.persistence.Entity; import jakarta.persistence.EnumType; import jakarta.persistence.Enumerated; -import jakarta.persistence.FetchType; import jakarta.persistence.Index; -import jakarta.persistence.ManyToOne; import jakarta.persistence.OneToMany; -import jakarta.persistence.OrderBy; import jakarta.persistence.Table; import jakarta.persistence.Transient; import lombok.Data; @@ -38,7 +26,7 @@ @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) -@ToString(exclude = { "bulkLoad" }) +@ToString(exclude = { "history" }) @AGRCurationSchemaVersion(min = "1.3.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { AuditedObject.class }) @Table( indexes = { @@ -50,15 +38,6 @@ ) public class BulkLoadFile extends AuditedObject { - @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer", valueBridge = @ValueBridgeRef(type = OffsetDateTimeValueBridge.class)) - @KeywordField(name = "dateLastLoaded_keyword", sortable = Sortable.YES, searchable = Searchable.YES, aggregable = Aggregable.YES, valueBridge = @ValueBridgeRef(type = OffsetDateTimeValueBridge.class)) - @JsonView(View.FieldsOnly.class) - private OffsetDateTime dateLastLoaded; - - @JsonView({ View.FieldsOnly.class }) - @Enumerated(EnumType.STRING) - private JobStatus bulkloadStatus; - @JsonView({ View.FieldsOnly.class }) @Enumerated(EnumType.STRING) private BulkLoadCleanUp bulkloadCleanUp; @@ -79,22 +58,14 @@ public class BulkLoadFile extends AuditedObject { @JsonView({ View.FieldsOnly.class }) private Integer recordCount; - @JsonView({ View.FieldsOnly.class }) - @Column(columnDefinition = "TEXT") - private String errorMessage; - @JsonView({ View.FieldsOnly.class }) private String linkMLSchemaVersion; @JsonView({ View.FieldsOnly.class }) private String allianceMemberReleaseVersion; - @ManyToOne - private BulkLoad bulkLoad; - @JsonView({ View.FieldsOnly.class }) - @OneToMany(mappedBy = "bulkLoadFile", fetch = FetchType.EAGER) - @OrderBy("loadFinished DESC") + @OneToMany(mappedBy = "bulkLoad") private List history; @Transient @@ -106,7 +77,7 @@ public String getS3Url() { @Transient @JsonIgnore @JsonView({ View.FieldsOnly.class }) - public String generateS3MD5Path() { + public String generateS3MD5Path(BulkLoad bulkLoad) { if (md5Sum != null && md5Sum.length() > 0) { return md5Sum.charAt(0) + "/" + md5Sum.charAt(1) + "/" + md5Sum.charAt(2) + "/" + md5Sum.charAt(3) + "/" + md5Sum + "." + bulkLoad.getBackendBulkLoadType().fileExtension + ".gz"; } else { diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoadFileHistory.java b/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoadFileHistory.java index 187386aa0..05de25735 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoadFileHistory.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoadFileHistory.java @@ -5,6 +5,7 @@ import java.util.List; import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; +import org.alliancegenome.curation_api.enums.JobStatus; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.entities.base.AuditedObject; import org.alliancegenome.curation_api.view.View; @@ -13,7 +14,10 @@ import com.fasterxml.jackson.annotation.JsonView; +import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EnumType; +import jakarta.persistence.Enumerated; import jakarta.persistence.Index; import jakarta.persistence.ManyToOne; import jakarta.persistence.OneToMany; @@ -42,11 +46,11 @@ public class BulkLoadFileHistory extends AuditedObject { @JsonView({ View.FieldsOnly.class }) - private LocalDateTime loadStarted; + private LocalDateTime loadStarted = LocalDateTime.now(); @JsonView({ View.FieldsOnly.class }) private LocalDateTime loadFinished; - + @JsonView({ View.FieldsOnly.class }) private Long totalRecords = 0L; @@ -68,9 +72,20 @@ public class BulkLoadFileHistory extends AuditedObject { @JsonView({ View.FieldsOnly.class }) private Double errorRate = 0.0; + @JsonView({ View.FieldsOnly.class }) + @Enumerated(EnumType.STRING) + private JobStatus bulkloadStatus; + + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "TEXT") + private String errorMessage; + @ManyToOne @OnDelete(action = OnDeleteAction.CASCADE) private BulkLoadFile bulkLoadFile; + + @ManyToOne + private BulkLoad bulkLoad; @OneToMany(mappedBy = "bulkLoadFileHistory") @JsonView(View.BulkLoadFileHistoryView.class) diff --git a/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java b/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java index 307dffc75..767cd48de 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java +++ b/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java @@ -60,6 +60,48 @@ public class Gff3Service { @Inject NcbiTaxonTermService ncbiTaxonTermService; @Inject Gff3DtoValidator gff3DtoValidator; + + @Transactional + public void loadExonEntity(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider) throws ObjectUpdateException { + Gff3DTO gffEntry = gffEntryPair.getKey(); + Map attributes = gffEntryPair.getValue(); + if (StringUtils.equals(gffEntry.getType(), "exon") || StringUtils.equals(gffEntry.getType(), "noncoding_exon")) { + Exon exon = gff3DtoValidator.validateExonEntry(gffEntry, attributes, dataProvider); + if (exon != null) { + idsAdded.add(exon.getId()); + } + } + } + + @Transactional + public void loadCDSEntity(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider) throws ObjectUpdateException { + Gff3DTO gffEntry = gffEntryPair.getKey(); + Map attributes = gffEntryPair.getValue(); + if (StringUtils.equals(gffEntry.getType(), "CDS")) { + CodingSequence cds = gff3DtoValidator.validateCdsEntry(gffEntry, attributes, dataProvider); + if (cds != null) { + idsAdded.add(cds.getId()); + } + } + } + + @Transactional + public void loadTranscriptEntity(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider) throws ObjectUpdateException { + Gff3DTO gffEntry = gffEntryPair.getKey(); + Map attributes = gffEntryPair.getValue(); + if (Gff3Constants.TRANSCRIPT_TYPES.contains(gffEntry.getType())) { + if (StringUtils.equals(gffEntry.getType(), "lnc_RNA")) { + gffEntry.setType("lncRNA"); + } + Transcript transcript = gff3DtoValidator.validateTranscriptEntry(gffEntry, attributes, dataProvider); + if (transcript != null) { + idsAdded.add(transcript.getId()); + } + } + } + + + @Transactional public String loadGenomeAssembly(String assemblyName, List gffHeaderData, BackendBulkDataProvider dataProvider) throws ObjectUpdateException { @@ -92,33 +134,8 @@ public String loadGenomeAssembly(String assemblyName, List gffHeaderData } } - public Map> loadEntity(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, Map> idsAdded, BackendBulkDataProvider dataProvider) throws ObjectUpdateException { - Gff3DTO gffEntry = gffEntryPair.getKey(); - Map attributes = gffEntryPair.getValue(); - if (StringUtils.equals(gffEntry.getType(), "exon") || StringUtils.equals(gffEntry.getType(), "noncoding_exon")) { - Exon exon = gff3DtoValidator.validateExonEntry(gffEntry, attributes, dataProvider); - if (exon != null) { - idsAdded.get("Exon").add(exon.getId()); - } - } else if (StringUtils.equals(gffEntry.getType(), "CDS")) { - CodingSequence cds = gff3DtoValidator.validateCdsEntry(gffEntry, attributes, dataProvider); - if (cds != null) { - idsAdded.get("CodingSequence").add(cds.getId()); - } - } else if (Gff3Constants.TRANSCRIPT_TYPES.contains(gffEntry.getType())) { - if (StringUtils.equals(gffEntry.getType(), "lnc_RNA")) { - gffEntry.setType("lncRNA"); - } - Transcript transcript = gff3DtoValidator.validateTranscriptEntry(gffEntry, attributes, dataProvider); - if (transcript != null) { - idsAdded.get("Transcript").add(transcript.getId()); - } - } - return idsAdded; - } - @Transactional - public Map> loadLocationAssociations(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, Map> idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap) throws ObjectUpdateException { + public void loadLocationAssociations(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, Map> idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap) throws ObjectUpdateException { Gff3DTO gffEntry = gffEntryPair.getKey(); Map attributes = gffEntryPair.getValue(); if (StringUtils.isBlank(assemblyId)) { @@ -170,12 +187,11 @@ public Map> loadLocationAssociations(BulkLoadFileHistory hist transcriptLocationService.addAssociationToSubject(transcriptLocation); } } - - return idsAdded; + } @Transactional - public Map> loadParentChildAssociations(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, Map> idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap) throws ObjectUpdateException { + public void loadParentChildAssociations(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, Map> idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap) throws ObjectUpdateException { Gff3DTO gffEntry = gffEntryPair.getKey(); Map attributes = gffEntryPair.getValue(); if (StringUtils.isBlank(assemblyId)) { @@ -227,11 +243,10 @@ public Map> loadParentChildAssociations(BulkLoadFileHistory h transcriptGeneService.addAssociationToSubjectAndObject(geneAssociation); } } - - return idsAdded; + } - public Map getIdCurieMap(List>> gffData, BackendBulkDataProvider dataProvider) { + public Map getIdCurieMap(List>> gffData) { Map geneIdCurieMap = new HashMap<>(); for (ImmutablePair> gffEntryPair : gffData) { diff --git a/src/main/java/org/alliancegenome/curation_api/services/loads/BulkLoadFileHistoryService.java b/src/main/java/org/alliancegenome/curation_api/services/loads/BulkLoadFileHistoryService.java index d342c0044..f366e9de6 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/loads/BulkLoadFileHistoryService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/loads/BulkLoadFileHistoryService.java @@ -71,7 +71,7 @@ public Response download(Long id) { // response.header("Content-Disposition", "attachment; filename=\"" + id + "_file_exceptions.json\""); Response.ResponseBuilder response = Response.ok(jsonArray.toString()); - response.header("Content-Disposition", "attachment; filename=\"" + bulkLoadFileHistory.getBulkLoadFile().getBulkLoad().getName().replace(" ", "_") + "_exceptions.json\""); + response.header("Content-Disposition", "attachment; filename=\"" + bulkLoadFileHistory.getBulkLoad().getName().replace(" ", "_") + "_exceptions.json\""); response.type(MediaType.APPLICATION_OCTET_STREAM); return response.build(); } diff --git a/src/main/java/org/alliancegenome/curation_api/services/loads/BulkLoadFileService.java b/src/main/java/org/alliancegenome/curation_api/services/loads/BulkLoadFileService.java index 503d85c3e..aca1fe571 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/loads/BulkLoadFileService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/loads/BulkLoadFileService.java @@ -1,12 +1,12 @@ package org.alliancegenome.curation_api.services.loads; import org.alliancegenome.curation_api.dao.loads.BulkLoadFileDAO; -import org.alliancegenome.curation_api.enums.JobStatus; -import org.alliancegenome.curation_api.jobs.events.PendingBulkLoadFileJobEvent; +import org.alliancegenome.curation_api.jobs.events.PendingLoadJobEvent; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFile; import org.alliancegenome.curation_api.response.ObjectResponse; import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; +import io.quarkus.logging.Log; import jakarta.annotation.PostConstruct; import jakarta.enterprise.context.RequestScoped; import jakarta.enterprise.event.Event; @@ -18,7 +18,7 @@ public class BulkLoadFileService extends BaseEntityCrudService pendingFileJobEvents; + @Inject Event pendingFileJobEvents; @Override @PostConstruct @@ -29,7 +29,7 @@ protected void init() { public ObjectResponse restartLoad(Long id) { ObjectResponse resp = updateLoad(id); if (resp != null) { - pendingFileJobEvents.fire(new PendingBulkLoadFileJobEvent(id)); + pendingFileJobEvents.fire(new PendingLoadJobEvent(id)); return resp; } return null; @@ -37,11 +37,12 @@ public ObjectResponse restartLoad(Long id) { @Transactional protected ObjectResponse updateLoad(Long id) { - BulkLoadFile load = bulkLoadFileDAO.find(id); - if (load != null && load.getBulkloadStatus().isNotRunning()) { - load.setBulkloadStatus(JobStatus.FORCED_PENDING); - return new ObjectResponse(load); - } + Log.error("Not Implemented anymore restarting a load happens through the history"); + //BulkLoadFile load = bulkLoadFileDAO.find(id); + //if (load != null && load.getBulkloadStatus().isNotRunning()) { + // load.setBulkloadStatus(JobStatus.FORCED_PENDING); + // return new ObjectResponse(load); + //} return null; } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java index 87f0ebcfa..4ccaa427c 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java @@ -88,7 +88,7 @@ public Exon validateExonEntry(Gff3DTO dto, Map attributes, Backe exon.setName(attributes.get("Name")); } - ObjectResponse exonResponse = validateGffEntity(exon, dto, attributes, dataProvider); + ObjectResponse exonResponse = validateGenomicEntity(exon, dto, attributes, dataProvider); if (exonResponse.hasErrors()) { throw new ObjectValidationException(dto, exonResponse.errorMessagesString()); @@ -115,7 +115,7 @@ public CodingSequence validateCdsEntry(Gff3DTO dto, Map attribut cds.setName(attributes.get("Name")); } - ObjectResponse cdsResponse = validateGffEntity(cds, dto, attributes, dataProvider); + ObjectResponse cdsResponse = validateGenomicEntity(cds, dto, attributes, dataProvider); if (cdsResponse.hasErrors()) { throw new ObjectValidationException(dto, cdsResponse.errorMessagesString()); @@ -148,7 +148,7 @@ public Transcript validateTranscriptEntry(Gff3DTO dto, Map attri transcript.setName(attributes.get("Name")); } - ObjectResponse transcriptResponse = validateGffEntity(transcript, dto, attributes, dataProvider); + ObjectResponse transcriptResponse = validateGenomicEntity(transcript, dto, attributes, dataProvider); if (!attributes.containsKey("ID")) { transcriptResponse.addErrorMessage("attributes - ID", ValidationConstants.REQUIRED_MESSAGE); } @@ -160,7 +160,7 @@ public Transcript validateTranscriptEntry(Gff3DTO dto, Map attri return transcriptDAO.persist(transcriptResponse.getEntity()); } - private ObjectResponse validateGffEntity(E entity, Gff3DTO dto, Map attributes, BackendBulkDataProvider dataProvider) { + private ObjectResponse validateGenomicEntity(E entity, Gff3DTO dto, Map attributes, BackendBulkDataProvider dataProvider) { ObjectResponse geResponse = new ObjectResponse(); entity.setDataProvider(dataProviderService.getDefaultDataProvider(dataProvider.sourceOrganization)); From a01b94e93664c731c552d30346f9e20414d2428c Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Thu, 29 Aug 2024 12:37:19 -0600 Subject: [PATCH 05/19] Fix for linking objects together --- .../model/entities/bulkloads/BulkLoad.java | 8 +---- .../entities/bulkloads/BulkLoadFile.java | 2 +- .../v0.37.0.46__remove_dataprovider_dups.sql | 36 +++++++++++++++++++ 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoad.java b/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoad.java index de61c14cc..626252464 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoad.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoad.java @@ -73,14 +73,8 @@ public abstract class BulkLoad extends AuditedObject { private BulkLoadGroup group; @JsonView({ View.FieldsOnly.class }) - @OneToMany(mappedBy = "bulkLoadFile", fetch = FetchType.EAGER) + @OneToMany(mappedBy = "bulkLoad", fetch = FetchType.EAGER) @OrderBy("loadFinished DESC") private List history; - -// @JsonView({ View.FieldsOnly.class }) -// @OneToMany(mappedBy = "bulkLoad", fetch = FetchType.EAGER) -// @OrderBy("dateUpdated DESC") -// private List loadFiles; - } diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoadFile.java b/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoadFile.java index b969a6697..2626f6732 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoadFile.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/bulkloads/BulkLoadFile.java @@ -65,7 +65,7 @@ public class BulkLoadFile extends AuditedObject { private String allianceMemberReleaseVersion; @JsonView({ View.FieldsOnly.class }) - @OneToMany(mappedBy = "bulkLoad") + @OneToMany(mappedBy = "bulkLoadFile") private List history; @Transient diff --git a/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql index b82cdb6a4..7c7cfcd70 100644 --- a/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql +++ b/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql @@ -3,3 +3,39 @@ DELETE FROM crossreference cr USING crossreference_ids_to_delete cd WHERE cr.id DROP TABLE dataprovider_ids_to_keep; DROP TABLE crossreference_ids_to_delete; DROP TABLE dataprovider_ids_to_delete; + +-- Migration to switch bulk load file and history around + +ALTER TABLE bulkloadfilehistory ADD COLUMN bulkload_id bigint; +ALTER TABLE bulkloadfilehistory ADD COLUMN errormessage text; +ALTER TABLE bulkloadfilehistory ADD COLUMN bulkloadstatus character varying(255); +ALTER TABLE bulkloadfilehistory ADD CONSTRAINT bulkloadfilehistory_bulkload_fk FOREIGN KEY (bulkload_id) REFERENCES bulkload(id); + +CREATE INDEX bulkloadfilehistory_bulkloadstatus_index ON bulkloadfilehistory USING btree (bulkloadstatus); +CREATE INDEX bulkloadfilehistory_bulkload_index ON bulkloadfilehistory USING btree (bulkload_id); +CREATE INDEX bulkloadfile_md5sum_index ON bulkloadfile USING btree (md5sum); + +UPDATE bulkloadfilehistory bh +SET bulkload_id = bf.bulkload_id +FROM bulkloadfile bf +WHERE + bf.id = bh.bulkloadfile_id; + +UPDATE bulkloadfilehistory bh +SET errormessage = bf.errormessage +FROM bulkloadfile bf +WHERE + bf.id = bh.bulkloadfile_id; + +UPDATE bulkloadfilehistory bh +SET bulkloadstatus = bf.bulkloadstatus +FROM bulkloadfile bf +WHERE + bf.id = bh.bulkloadfile_id; + +DELETE from bulkloadfilehistory where bulkloadfile_id is null; + +ALTER TABLE bulkloadfile DROP COLUMN bulkload_id; +ALTER TABLE bulkloadfile DROP COLUMN errorMessage; +ALTER TABLE bulkloadfile DROP COLUMN bulkloadStatus; + From f187d7348d3d63190267ed65b77d4bf5b7330802 Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Thu, 29 Aug 2024 19:53:39 -0600 Subject: [PATCH 06/19] Added more code for better performance --- .../dataLoadsPage/DataLoadsComponent.js | 39 ++- .../cliapp/src/service/DataLoadService.js | 16 +- .../jobs/executors/BulkLoadJobExecutor.java | 20 +- .../jobs/executors/Gff3CDSExecutor.java | 11 +- .../executors/Gff3CDSLocationExecutor.java | 131 ++++++++++ .../jobs/executors/Gff3Executor.java | 242 ++---------------- .../jobs/executors/Gff3ExonExecutor.java | 15 +- .../executors/Gff3ExonLocationExecutor.java | 131 ++++++++++ .../executors/Gff3TranscriptCDSExecutor.java | 130 ++++++++++ .../executors/Gff3TranscriptExecutor.java | 9 +- .../executors/Gff3TranscriptExonExecutor.java | 130 ++++++++++ .../executors/Gff3TranscriptGeneExecutor.java | 129 ++++++++++ .../Gff3TranscriptLocationExecutor.java | 132 ++++++++++ .../model/entities/AssemblyComponent.java | 4 +- .../entities/bulkloads/BulkLoadFile.java | 1 - .../bulkloads/BulkLoadFileHistory.java | 8 +- .../curation_api/services/Gff3Service.java | 107 +++++--- ...enceGenomicLocationAssociationService.java | 3 +- .../websocket/IndexProcessingWebsocket.java | 4 +- .../websocket/LoadProcessingWebsocket.java | 4 +- .../v0.37.0.46__remove_dataprovider_dups.sql | 115 +++++++++ .../v0.37.0.7__remove_dataprovider_dups.sql | 3 + 22 files changed, 1072 insertions(+), 312 deletions(-) create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3CDSLocationExecutor.java create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3ExonLocationExecutor.java create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptCDSExecutor.java create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptExonExecutor.java create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptGeneExecutor.java create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptLocationExecutor.java diff --git a/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js b/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js index 360ec99b7..f13611187 100644 --- a/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js +++ b/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js @@ -105,8 +105,8 @@ export const DataLoadsComponent = () => { if (group.loads) { for (let load of group.loads) { load.group = group.id; - if (load.loadFiles) { - let sortedFiles = sortFilesByDate(load.loadFiles); + if (load.history) { + let sortedFiles = sortFilesByDate(load.history); if (sortedFiles[0].bulkloadStatus === 'FAILED') { _errorLoads.push(load); } @@ -161,7 +161,7 @@ export const DataLoadsComponent = () => { }; const urlTemplate = (rowData) => { - return Download; + return Download; }; const refresh = () => { @@ -190,9 +190,9 @@ export const DataLoadsComponent = () => { setDisableFormFields(true); }; - const deleteLoadFile = (rowData) => { + const deleteLoadFileHistory = (rowData) => { getService() - .deleteLoadFile(rowData.id) + .deleteLoadFileHistory(rowData.id) .then((response) => { queryClient.invalidateQueries(['bulkloadtable']); }); @@ -306,7 +306,7 @@ export const DataLoadsComponent = () => { key="delete" icon="pi pi-trash" className="p-button-rounded p-button-danger mr-2" - onClick={() => deleteLoadFile(rowData)} + onClick={() => deleteLoadFileHistory(rowData)} /> ); } @@ -354,7 +354,7 @@ export const DataLoadsComponent = () => { ); } - if (!rowData.loadFiles || rowData.loadFiles.length === 0) { + if (!rowData.history || rowData.history.length === 0) { ret.push(