From e8ef14074ec861a5eacd4d61c13fc0af1c10bdb8 Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Tue, 3 Sep 2024 14:12:05 -0600 Subject: [PATCH 01/12] Remove and add back FK's --- .../db/migration/v0.37.0.24__remove_dataprovider_dups.sql | 4 ++++ .../db/migration/v0.37.0.46__remove_dataprovider_dups.sql | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/main/resources/db/migration/v0.37.0.24__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.24__remove_dataprovider_dups.sql index bbf4d76f0..715462dfb 100644 --- a/src/main/resources/db/migration/v0.37.0.24__remove_dataprovider_dups.sql +++ b/src/main/resources/db/migration/v0.37.0.24__remove_dataprovider_dups.sql @@ -31,3 +31,7 @@ CREATE INDEX crossreference_ids_to_delete_index ON crossreference_ids_to_delete INSERT INTO dataprovider_ids_to_delete (id) select dp.id from dataprovider dp left join dataprovider_ids_to_keep dk on dp.id = dk.id where dk.id is null; CREATE INDEX dataprovider_ids_to_delete_index ON dataprovider_ids_to_delete USING btree (id); + +ALTER TABLE ONLY phenotypeannotation DROP CONSTRAINT crossreference_id_fk; +ALTER TABLE ONLY dataprovider DROP CONSTRAINT dataprovider_crossreference_id_fk; +ALTER TABLE ONLY externaldatabaseentity DROP CONSTRAINT externaldatabaseentity_preferredcrossreference_id; diff --git a/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql index 2ae4b3777..16082183c 100644 --- a/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql +++ b/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql @@ -4,6 +4,10 @@ DROP TABLE dataprovider_ids_to_keep; DROP TABLE crossreference_ids_to_delete; DROP TABLE dataprovider_ids_to_delete; +ALTER TABLE ONLY public.phenotypeannotation ADD CONSTRAINT crossreference_id_fk FOREIGN KEY (crossreference_id) REFERENCES public.crossreference(id); +ALTER TABLE ONLY public.dataprovider ADD CONSTRAINT dataprovider_crossreference_id_fk FOREIGN KEY (crossreference_id) REFERENCES public.crossreference(id); +ALTER TABLE ONLY public.externaldatabaseentity ADD CONSTRAINT externaldatabaseentity_preferredcrossreference_id FOREIGN KEY (preferredcrossreference_id) REFERENCES public.crossreference(id); + -- Migration to switch bulk load file and history around ALTER TABLE bulkloadfilehistory ADD COLUMN bulkload_id bigint; From 0b0bdf0299c4546eac0904121d0273c5a7a31a5d Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Tue, 3 Sep 2024 14:13:50 -0600 Subject: [PATCH 02/12] Updated constraints --- .../db/migration/v0.37.0.46__remove_dataprovider_dups.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql index 16082183c..c9eda5500 100644 --- a/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql +++ b/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql @@ -4,9 +4,9 @@ DROP TABLE dataprovider_ids_to_keep; DROP TABLE crossreference_ids_to_delete; DROP TABLE dataprovider_ids_to_delete; -ALTER TABLE ONLY public.phenotypeannotation ADD CONSTRAINT crossreference_id_fk FOREIGN KEY (crossreference_id) REFERENCES public.crossreference(id); -ALTER TABLE ONLY public.dataprovider ADD CONSTRAINT dataprovider_crossreference_id_fk FOREIGN KEY (crossreference_id) REFERENCES public.crossreference(id); -ALTER TABLE ONLY public.externaldatabaseentity ADD CONSTRAINT externaldatabaseentity_preferredcrossreference_id FOREIGN KEY (preferredcrossreference_id) REFERENCES public.crossreference(id); +ALTER TABLE ONLY phenotypeannotation ADD CONSTRAINT crossreference_id_fk FOREIGN KEY (crossreference_id) REFERENCES crossreference(id); +ALTER TABLE ONLY dataprovider ADD CONSTRAINT dataprovider_crossreference_id_fk FOREIGN KEY (crossreference_id) REFERENCES crossreference(id); +ALTER TABLE ONLY externaldatabaseentity ADD CONSTRAINT externaldatabaseentity_preferredcrossreference_id FOREIGN KEY (preferredcrossreference_id) REFERENCES crossreference(id); -- Migration to switch bulk load file and history around From bf823a10ce2da2467b55141bacb9e4127800270f Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Tue, 3 Sep 2024 15:46:03 -0600 Subject: [PATCH 03/12] Turned off the loads by default --- .../db/migration/v0.37.0.46__remove_dataprovider_dups.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql b/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql index c9eda5500..a6148f5f1 100644 --- a/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql +++ b/src/main/resources/db/migration/v0.37.0.46__remove_dataprovider_dups.sql @@ -143,7 +143,7 @@ INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) S INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) SELECT nextval('bulkload_seq'), 'GFF_TRANSCRIPT_CDS', 'XBXT GFF Transcript CDS Association Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) GFF Loads'; INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) SELECT nextval('bulkload_seq'), 'GFF_TRANSCRIPT_CDS', 'ZFIN GFF Transcript CDS Association Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) GFF Loads'; -INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) SELECT id, '0 0 22 ? * SUN-THU', true FROM bulkload WHERE backendbulkloadtype in( +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) SELECT id, '0 0 22 ? * SUN-THU', false FROM bulkload WHERE backendbulkloadtype in( 'GFF_TRANSCRIPT', 'GFF_CDS', 'GFF_EXON', 'GFF_EXON_LOCATION', 'GFF_CDS_LOCATION', 'GFF_TRANSCRIPT_LOCATION', 'GFF_TRANSCRIPT_GENE', 'GFF_TRANSCRIPT_EXON', 'GFF_TRANSCRIPT_CDS' From ea89bcc0be512a6a7f70e19ee6fbe01994f6fead Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Tue, 3 Sep 2024 22:25:39 -0600 Subject: [PATCH 04/12] Split out data for smaller memory footprint --- .../crud/CodingSequenceCrudController.java | 6 +- .../controllers/crud/ExonCrudController.java | 6 +- .../crud/TranscriptCrudController.java | 10 +- .../jobs/executors/BulkLoadJobExecutor.java | 9 + .../jobs/executors/Gff3Executor.java | 74 ------ .../executors/{ => gff}/Gff3CDSExecutor.java | 55 +++-- .../{ => gff}/Gff3CDSLocationExecutor.java | 15 +- .../jobs/executors/gff/Gff3Executor.java | 40 +++ .../executors/{ => gff}/Gff3ExonExecutor.java | 54 ++-- .../{ => gff}/Gff3ExonLocationExecutor.java | 15 +- .../{ => gff}/Gff3TranscriptCDSExecutor.java | 21 +- .../{ => gff}/Gff3TranscriptExecutor.java | 25 +- .../{ => gff}/Gff3TranscriptExonExecutor.java | 23 +- .../{ => gff}/Gff3TranscriptGeneExecutor.java | 21 +- .../Gff3TranscriptLocationExecutor.java | 15 +- .../curation_api/services/Gff3Service.java | 231 ++++++++---------- .../helpers/gff3/Gff3AttributesHelper.java | 68 ++++++ .../validation/dto/Gff3DtoValidator.java | 13 +- 18 files changed, 367 insertions(+), 334 deletions(-) delete mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java rename src/main/java/org/alliancegenome/curation_api/jobs/executors/{ => gff}/Gff3CDSExecutor.java (83%) rename src/main/java/org/alliancegenome/curation_api/jobs/executors/{ => gff}/Gff3CDSLocationExecutor.java (90%) create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3Executor.java rename src/main/java/org/alliancegenome/curation_api/jobs/executors/{ => gff}/Gff3ExonExecutor.java (82%) rename src/main/java/org/alliancegenome/curation_api/jobs/executors/{ => gff}/Gff3ExonLocationExecutor.java (90%) rename src/main/java/org/alliancegenome/curation_api/jobs/executors/{ => gff}/Gff3TranscriptCDSExecutor.java (87%) rename src/main/java/org/alliancegenome/curation_api/jobs/executors/{ => gff}/Gff3TranscriptExecutor.java (82%) rename src/main/java/org/alliancegenome/curation_api/jobs/executors/{ => gff}/Gff3TranscriptExonExecutor.java (85%) rename src/main/java/org/alliancegenome/curation_api/jobs/executors/{ => gff}/Gff3TranscriptGeneExecutor.java (86%) rename src/main/java/org/alliancegenome/curation_api/jobs/executors/{ => gff}/Gff3TranscriptLocationExecutor.java (90%) diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/CodingSequenceCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/CodingSequenceCrudController.java index 8b45a1a03..206a7878c 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/crud/CodingSequenceCrudController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/CodingSequenceCrudController.java @@ -5,9 +5,9 @@ import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; import org.alliancegenome.curation_api.dao.CodingSequenceDAO; import org.alliancegenome.curation_api.interfaces.crud.CodingSequenceCrudInterface; -import org.alliancegenome.curation_api.jobs.executors.Gff3CDSExecutor; -import org.alliancegenome.curation_api.jobs.executors.Gff3CDSLocationExecutor; -import org.alliancegenome.curation_api.jobs.executors.Gff3TranscriptCDSExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3CDSExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3CDSLocationExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3TranscriptCDSExecutor; import org.alliancegenome.curation_api.model.entities.CodingSequence; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/ExonCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/ExonCrudController.java index 714d743b9..be499d5a3 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/crud/ExonCrudController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/ExonCrudController.java @@ -5,9 +5,9 @@ import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; import org.alliancegenome.curation_api.dao.ExonDAO; import org.alliancegenome.curation_api.interfaces.crud.ExonCrudInterface; -import org.alliancegenome.curation_api.jobs.executors.Gff3ExonExecutor; -import org.alliancegenome.curation_api.jobs.executors.Gff3ExonLocationExecutor; -import org.alliancegenome.curation_api.jobs.executors.Gff3TranscriptExonExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3ExonExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3ExonLocationExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3TranscriptExonExecutor; import org.alliancegenome.curation_api.model.entities.Exon; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java index a7979b864..66f391198 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java @@ -5,11 +5,11 @@ import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; import org.alliancegenome.curation_api.dao.TranscriptDAO; import org.alliancegenome.curation_api.interfaces.crud.TranscriptCrudInterface; -import org.alliancegenome.curation_api.jobs.executors.Gff3TranscriptCDSExecutor; -import org.alliancegenome.curation_api.jobs.executors.Gff3TranscriptExecutor; -import org.alliancegenome.curation_api.jobs.executors.Gff3TranscriptExonExecutor; -import org.alliancegenome.curation_api.jobs.executors.Gff3TranscriptGeneExecutor; -import org.alliancegenome.curation_api.jobs.executors.Gff3TranscriptLocationExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3TranscriptCDSExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3TranscriptExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3TranscriptExonExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3TranscriptGeneExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3TranscriptLocationExecutor; import org.alliancegenome.curation_api.model.entities.Transcript; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java index f6490947b..1619fc0d0 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java @@ -19,6 +19,15 @@ import org.alliancegenome.curation_api.enums.BackendBulkLoadType; import org.alliancegenome.curation_api.jobs.executors.associations.alleleAssociations.AlleleGeneAssociationExecutor; import org.alliancegenome.curation_api.jobs.executors.associations.constructAssociations.ConstructGenomicEntityAssociationExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3CDSExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3CDSLocationExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3ExonExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3ExonLocationExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3TranscriptCDSExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3TranscriptExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3TranscriptExonExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3TranscriptGeneExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3TranscriptLocationExecutor; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import jakarta.enterprise.context.ApplicationScoped; diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java deleted file mode 100644 index 2db33f4e1..000000000 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3Executor.java +++ /dev/null @@ -1,74 +0,0 @@ -package org.alliancegenome.curation_api.jobs.executors; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; -import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; -import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; -import org.alliancegenome.curation_api.services.Gff3Service; -import org.alliancegenome.curation_api.services.helpers.gff3.Gff3AttributesHelper; -import org.alliancegenome.curation_api.util.ProcessDisplayHelper; -import org.apache.commons.lang3.tuple.ImmutablePair; - -import jakarta.enterprise.context.ApplicationScoped; -import jakarta.inject.Inject; - -@ApplicationScoped -public class Gff3Executor extends LoadFileExecutor { - - @Inject Gff3Service gff3Service; - - protected String loadGenomeAssembly(String assemblyName, BulkLoadFileHistory history, List gffHeaderData, BackendBulkDataProvider dataProvider, ProcessDisplayHelper ph) { - try { - assemblyName = gff3Service.loadGenomeAssembly(assemblyName, gffHeaderData, dataProvider); - history.incrementCompleted(); - } catch (ObjectUpdateException e) { - //e.printStackTrace(); - history.incrementFailed(); - addException(history, e.getData()); - } catch (Exception e) { - e.printStackTrace(); - history.incrementFailed(); - addException(history, new ObjectUpdateExceptionData(gffHeaderData, e.getMessage(), e.getStackTrace())); - } - updateHistory(history); - ph.progressProcess(); - - return assemblyName; - } - - protected List>> preProcessGffData(List gffData, BackendBulkDataProvider dataProvider) { - List>> processedGffData = new ArrayList<>(); - - ProcessDisplayHelper ph = new ProcessDisplayHelper(); - ph.addDisplayHandler(loadProcessDisplayService); - ph.startProcess("GFF pre-processing for " + dataProvider.name(), gffData.size()); - - for (Gff3DTO originalGffEntry : gffData) { - Map attributes = Gff3AttributesHelper.getAttributes(originalGffEntry, dataProvider); - if (attributes.containsKey("Parent") && attributes.get("Parent").indexOf(",") > -1) { - for (String parent : attributes.get("Parent").split(",")) { - HashMap attributesCopy = new HashMap<>(); - attributesCopy.putAll(attributes); - String[] parentIdParts = parent.split(":"); - if (parentIdParts.length == 1) { - parent = dataProvider.name() + ':' + parentIdParts[0]; - } - attributesCopy.put("Parent", parent); - processedGffData.add(new ImmutablePair<>(originalGffEntry, attributesCopy)); - } - } else { - processedGffData.add(new ImmutablePair<>(originalGffEntry, attributes)); - } - ph.progressProcess(); - } - ph.finishProcess(); - - return processedGffData; - } -} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3CDSExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3CDSExecutor.java similarity index 83% rename from src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3CDSExecutor.java rename to src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3CDSExecutor.java index faecca58d..201d47c0c 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3CDSExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3CDSExecutor.java @@ -1,4 +1,4 @@ -package org.alliancegenome.curation_api.jobs.executors; +package org.alliancegenome.curation_api.jobs.executors.gff; import java.io.FileInputStream; import java.util.ArrayList; @@ -10,6 +10,7 @@ import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; +import org.alliancegenome.curation_api.model.entities.CodingSequence; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; @@ -17,6 +18,8 @@ import org.alliancegenome.curation_api.response.LoadHistoryResponce; import org.alliancegenome.curation_api.services.CodingSequenceService; import org.alliancegenome.curation_api.services.Gff3Service; +import org.alliancegenome.curation_api.services.helpers.gff3.Gff3AttributesHelper; +import org.alliancegenome.curation_api.services.validation.dto.Gff3DtoValidator; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -33,6 +36,7 @@ public class Gff3CDSExecutor extends Gff3Executor { @Inject Gff3Service gff3Service; @Inject CodingSequenceService cdsService; + @Inject Gff3DtoValidator gff3DtoValidator; public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { @@ -54,16 +58,16 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + List>> preProcessedCDSGffData = Gff3AttributesHelper.getCDSGffData(gffData, dataProvider); gffData.clear(); List idsAdded = new ArrayList<>(); - bulkLoadFileHistory.setTotalRecords((long) preProcessedGffData.size()); + bulkLoadFileHistory.setTotalRecords((long) preProcessedCDSGffData.size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedGffData, idsAdded, dataProvider); + boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedCDSGffData, idsAdded, dataProvider); if (success) { runCleanup(cdsService, bulkLoadFileHistory, dataProvider.name(), cdsService.getIdsByDataProvider(dataProvider), idsAdded, "GFF coding sequence"); @@ -82,31 +86,13 @@ private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, ph.addDisplayHandler(loadProcessDisplayService); ph.startProcess("GFF CDS update for " + dataProvider.name(), gffData.size()); - loadCDSEntities(history, gffData, idsAdded, dataProvider, ph); - - ph.finishProcess(); - - return true; - } - - public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { - List idsAdded = new ArrayList<>(); - BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); - BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedGffData.size()); - - runLoad(history, null, preProcessedGffData, idsAdded, dataProvider); - history.finishLoad(); - - return new LoadHistoryResponce(history); - } - - - private void loadCDSEntities(BulkLoadFileHistory history, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider, ProcessDisplayHelper ph) { updateHistory(history); for (ImmutablePair> gff3EntryPair : gffData) { try { - gff3Service.loadCDSEntity(history, gff3EntryPair, idsAdded, dataProvider); + CodingSequence cds = gff3DtoValidator.validateCdsEntry(gff3EntryPair.getKey(), gff3EntryPair.getValue(), dataProvider); + if (cds != null) { + idsAdded.add(cds.getId()); + } history.incrementCompleted(); } catch (ObjectUpdateException e) { history.incrementFailed(); @@ -119,7 +105,22 @@ private void loadCDSEntities(BulkLoadFileHistory history, List gffData) { + List idsAdded = new ArrayList<>(); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); + List>> preProcessedCDSGffData = Gff3AttributesHelper.getCDSGffData(gffData, dataProvider); + BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedCDSGffData.size()); + + runLoad(history, null, preProcessedCDSGffData, idsAdded, dataProvider); + history.finishLoad(); + + return new LoadHistoryResponce(history); + } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3CDSLocationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3CDSLocationExecutor.java similarity index 90% rename from src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3CDSLocationExecutor.java rename to src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3CDSLocationExecutor.java index 58a68dc23..83e1703df 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3CDSLocationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3CDSLocationExecutor.java @@ -1,4 +1,4 @@ -package org.alliancegenome.curation_api.jobs.executors; +package org.alliancegenome.curation_api.jobs.executors.gff; import java.io.FileInputStream; import java.util.ArrayList; @@ -16,6 +16,7 @@ import org.alliancegenome.curation_api.response.APIResponse; import org.alliancegenome.curation_api.response.LoadHistoryResponce; import org.alliancegenome.curation_api.services.associations.codingSequenceAssociations.CodingSequenceGenomicLocationAssociationService; +import org.alliancegenome.curation_api.services.helpers.gff3.Gff3AttributesHelper; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -52,16 +53,16 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + List>> preProcessedCDSGffData = Gff3AttributesHelper.getCDSGffData(gffData, dataProvider); gffData.clear(); List idsAdded = new ArrayList<>(); - bulkLoadFileHistory.setTotalRecords((long) preProcessedGffData.size()); + bulkLoadFileHistory.setTotalRecords((long) preProcessedCDSGffData.size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedGffData, idsAdded, dataProvider, null); + boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedCDSGffData, idsAdded, dataProvider, null); if (success) { runCleanup(cdsLocationService, bulkLoadFileHistory, dataProvider.name(), cdsLocationService.getIdsByDataProvider(dataProvider), idsAdded, "GFF coding sequence genomic location association"); @@ -98,10 +99,10 @@ private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { List idsAdded = new ArrayList<>(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); - BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedGffData.size()); + List>> preProcessedCDSGffData = Gff3AttributesHelper.getCDSGffData(gffData, dataProvider); + BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedCDSGffData.size()); - runLoad(history, null, preProcessedGffData, idsAdded, dataProvider, assemblyName); + runLoad(history, null, preProcessedCDSGffData, idsAdded, dataProvider, assemblyName); history.finishLoad(); return new LoadHistoryResponce(history); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3Executor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3Executor.java new file mode 100644 index 000000000..fed9dd4a3 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3Executor.java @@ -0,0 +1,40 @@ +package org.alliancegenome.curation_api.jobs.executors.gff; + +import java.util.List; + +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; +import org.alliancegenome.curation_api.jobs.executors.LoadFileExecutor; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; +import org.alliancegenome.curation_api.services.Gff3Service; +import org.alliancegenome.curation_api.util.ProcessDisplayHelper; + +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; + +@ApplicationScoped +public class Gff3Executor extends LoadFileExecutor { + + @Inject Gff3Service gff3Service; + + protected String loadGenomeAssembly(String assemblyName, BulkLoadFileHistory history, List gffHeaderData, BackendBulkDataProvider dataProvider, ProcessDisplayHelper ph) { + try { + assemblyName = gff3Service.loadGenomeAssembly(assemblyName, gffHeaderData, dataProvider); + history.incrementCompleted(); + } catch (ObjectUpdateException e) { + //e.printStackTrace(); + history.incrementFailed(); + addException(history, e.getData()); + } catch (Exception e) { + e.printStackTrace(); + history.incrementFailed(); + addException(history, new ObjectUpdateExceptionData(gffHeaderData, e.getMessage(), e.getStackTrace())); + } + updateHistory(history); + ph.progressProcess(); + + return assemblyName; + } + +} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3ExonExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3ExonExecutor.java similarity index 82% rename from src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3ExonExecutor.java rename to src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3ExonExecutor.java index 35b37b6c1..6e2e6d596 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3ExonExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3ExonExecutor.java @@ -1,4 +1,4 @@ -package org.alliancegenome.curation_api.jobs.executors; +package org.alliancegenome.curation_api.jobs.executors.gff; import java.io.FileInputStream; import java.util.ArrayList; @@ -10,12 +10,15 @@ import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; +import org.alliancegenome.curation_api.model.entities.Exon; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; import org.alliancegenome.curation_api.response.APIResponse; import org.alliancegenome.curation_api.response.LoadHistoryResponce; import org.alliancegenome.curation_api.services.ExonService; +import org.alliancegenome.curation_api.services.helpers.gff3.Gff3AttributesHelper; +import org.alliancegenome.curation_api.services.validation.dto.Gff3DtoValidator; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -31,6 +34,7 @@ public class Gff3ExonExecutor extends Gff3Executor { @Inject ExonService exonService; + @Inject Gff3DtoValidator gff3DtoValidator; public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { @@ -52,16 +56,16 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + List>> preProcessedExonGffData = Gff3AttributesHelper.getExonGffData(gffData, dataProvider); gffData.clear(); List idsAdded = new ArrayList(); - bulkLoadFileHistory.setTotalRecords((long) preProcessedGffData.size()); + bulkLoadFileHistory.setTotalRecords((long) preProcessedExonGffData.size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedGffData, idsAdded, dataProvider); + boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedExonGffData, idsAdded, dataProvider); if (success) { runCleanup(exonService, bulkLoadFileHistory, dataProvider.name(), exonService.getIdsByDataProvider(dataProvider), idsAdded, "GFF exon"); } @@ -80,31 +84,13 @@ private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, ph.addDisplayHandler(loadProcessDisplayService); ph.startProcess("GFF Exon update for " + dataProvider.name(), gffData.size()); - loadExonEntities(history, gffData, idsAdded, dataProvider, ph); - - ph.finishProcess(); - - return true; - } - - public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { - List idsAdded = new ArrayList(); - BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); - BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedGffData.size()); - - runLoad(history, null, preProcessedGffData, idsAdded, dataProvider); - history.finishLoad(); - - return new LoadHistoryResponce(history); - } - - - private void loadExonEntities(BulkLoadFileHistory history, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider, ProcessDisplayHelper ph) { updateHistory(history); for (ImmutablePair> gff3EntryPair : gffData) { try { - gff3Service.loadExonEntity(history, gff3EntryPair, idsAdded, dataProvider); + Exon exon = gff3DtoValidator.validateExonEntry(gff3EntryPair.getKey(), gff3EntryPair.getValue(), dataProvider); + if (exon != null) { + idsAdded.add(exon.getId()); + } history.incrementCompleted(); } catch (ObjectUpdateException e) { history.incrementFailed(); @@ -118,6 +104,22 @@ private void loadExonEntities(BulkLoadFileHistory history, List gffData) { + List idsAdded = new ArrayList(); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); + List>> preProcessedExonGffData = Gff3AttributesHelper.getExonGffData(gffData, dataProvider); + BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedExonGffData.size()); + + runLoad(history, null, preProcessedExonGffData, idsAdded, dataProvider); + history.finishLoad(); + + return new LoadHistoryResponce(history); + } + } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3ExonLocationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3ExonLocationExecutor.java similarity index 90% rename from src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3ExonLocationExecutor.java rename to src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3ExonLocationExecutor.java index 0c7ee92bf..7063df784 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3ExonLocationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3ExonLocationExecutor.java @@ -1,4 +1,4 @@ -package org.alliancegenome.curation_api.jobs.executors; +package org.alliancegenome.curation_api.jobs.executors.gff; import java.io.FileInputStream; import java.util.ArrayList; @@ -16,6 +16,7 @@ import org.alliancegenome.curation_api.response.APIResponse; import org.alliancegenome.curation_api.response.LoadHistoryResponce; import org.alliancegenome.curation_api.services.associations.exonAssociations.ExonGenomicLocationAssociationService; +import org.alliancegenome.curation_api.services.helpers.gff3.Gff3AttributesHelper; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -52,16 +53,16 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + List>> preProcessedExonGffData = Gff3AttributesHelper.getExonGffData(gffData, dataProvider); gffData.clear(); List idsAdded = new ArrayList<>(); - bulkLoadFileHistory.setTotalRecords((long) preProcessedGffData.size()); + bulkLoadFileHistory.setTotalRecords((long) preProcessedExonGffData.size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedGffData, idsAdded, dataProvider, null); + boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedExonGffData, idsAdded, dataProvider, null); if (success) { runCleanup(exonLocationService, bulkLoadFileHistory, dataProvider.name(), exonLocationService.getIdsByDataProvider(dataProvider), idsAdded, "GFF exon genomic location association"); @@ -98,10 +99,10 @@ private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { List idsAdded = new ArrayList<>(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); - BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedGffData.size()); + List>> preProcessedExonGffData = Gff3AttributesHelper.getExonGffData(gffData, dataProvider); + BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedExonGffData.size()); - runLoad(history, null, preProcessedGffData, idsAdded, dataProvider, assemblyName); + runLoad(history, null, preProcessedExonGffData, idsAdded, dataProvider, assemblyName); history.finishLoad(); return new LoadHistoryResponce(history); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptCDSExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptCDSExecutor.java similarity index 87% rename from src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptCDSExecutor.java rename to src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptCDSExecutor.java index bdeb7a100..a91b5cdaa 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptCDSExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptCDSExecutor.java @@ -1,4 +1,4 @@ -package org.alliancegenome.curation_api.jobs.executors; +package org.alliancegenome.curation_api.jobs.executors.gff; import java.io.FileInputStream; import java.util.ArrayList; @@ -16,6 +16,7 @@ import org.alliancegenome.curation_api.response.APIResponse; import org.alliancegenome.curation_api.response.LoadHistoryResponce; import org.alliancegenome.curation_api.services.associations.transcriptAssociations.TranscriptCodingSequenceAssociationService; +import org.alliancegenome.curation_api.services.helpers.gff3.Gff3AttributesHelper; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -52,16 +53,16 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + List>> preProcessedCDSGffData = Gff3AttributesHelper.getCDSGffData(gffData, dataProvider); gffData.clear(); List idsAdded = new ArrayList<>(); - bulkLoadFileHistory.setTotalRecords((long) preProcessedGffData.size()); + bulkLoadFileHistory.setTotalRecords((long) preProcessedCDSGffData.size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedGffData, idsAdded, dataProvider, null); + boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedCDSGffData, idsAdded, dataProvider, null); if (success) { runCleanup(transcriptCdsService, bulkLoadFileHistory, dataProvider.name(), transcriptCdsService.getIdsByDataProvider(dataProvider), idsAdded, "GFF transcript coding sequence association"); @@ -88,7 +89,7 @@ private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, return false; } else { Map geneIdCurieMap = gff3Service.getIdCurieMap(gffData); - loadParentChildAssociations(history, gffData, idsAdded, dataProvider, assemblyId, geneIdCurieMap, ph); + loadParentChildAssociations(history, gffData, idsAdded, dataProvider, geneIdCurieMap, ph); } ph.finishProcess(); @@ -98,20 +99,20 @@ private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { List idsAdded = new ArrayList<>(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); - BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedGffData.size()); + List>> preProcessedCDSGffData = Gff3AttributesHelper.getCDSGffData(gffData, dataProvider); + BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedCDSGffData.size()); - runLoad(history, null, preProcessedGffData, idsAdded, dataProvider, assemblyName); + runLoad(history, null, preProcessedCDSGffData, idsAdded, dataProvider, assemblyName); history.finishLoad(); return new LoadHistoryResponce(history); } - private void loadParentChildAssociations(BulkLoadFileHistory history, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap, ProcessDisplayHelper ph) { + private void loadParentChildAssociations(BulkLoadFileHistory history, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider, Map geneIdCurieMap, ProcessDisplayHelper ph) { updateHistory(history); for (ImmutablePair> gff3EntryPair : gffData) { try { - gff3Service.loadCDSParentChildAssociations(history, gff3EntryPair, idsAdded, dataProvider, assemblyId, geneIdCurieMap); + gff3Service.loadCDSParentChildAssociations(history, gff3EntryPair, idsAdded, dataProvider, geneIdCurieMap); history.incrementCompleted(); } catch (ObjectUpdateException e) { history.incrementFailed(); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptExecutor.java similarity index 82% rename from src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptExecutor.java rename to src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptExecutor.java index 3c9f76bbd..8ef6e17c2 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptExecutor.java @@ -1,4 +1,4 @@ -package org.alliancegenome.curation_api.jobs.executors; +package org.alliancegenome.curation_api.jobs.executors.gff; import java.io.FileInputStream; import java.util.ArrayList; @@ -10,12 +10,15 @@ import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; +import org.alliancegenome.curation_api.model.entities.Transcript; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; import org.alliancegenome.curation_api.response.APIResponse; import org.alliancegenome.curation_api.response.LoadHistoryResponce; import org.alliancegenome.curation_api.services.TranscriptService; +import org.alliancegenome.curation_api.services.helpers.gff3.Gff3AttributesHelper; +import org.alliancegenome.curation_api.services.validation.dto.Gff3DtoValidator; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -31,7 +34,8 @@ public class Gff3TranscriptExecutor extends Gff3Executor { @Inject TranscriptService transcriptService; - + @Inject Gff3DtoValidator gff3DtoValidator; + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { @@ -52,16 +56,16 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + List>> preProcessedTranscriptGffData = Gff3AttributesHelper.getTranscriptGffData(gffData, dataProvider); gffData.clear(); List idsAdded = new ArrayList<>(); - bulkLoadFileHistory.setTotalRecords((long) preProcessedGffData.size()); + bulkLoadFileHistory.setTotalRecords((long) preProcessedTranscriptGffData.size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedGffData, idsAdded, dataProvider); + boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedTranscriptGffData, idsAdded, dataProvider); if (success) { runCleanup(transcriptService, bulkLoadFileHistory, dataProvider.name(), transcriptService.getIdsByDataProvider(dataProvider), idsAdded, "GFF transcript"); @@ -90,10 +94,10 @@ private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { List idsAdded = new ArrayList<>(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); - BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedGffData.size()); + List>> preProcessedTranscriptGffData = Gff3AttributesHelper.getTranscriptGffData(gffData, dataProvider); + BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedTranscriptGffData.size()); - runLoad(history, null, preProcessedGffData, idsAdded, dataProvider); + runLoad(history, null, preProcessedTranscriptGffData, idsAdded, dataProvider); history.finishLoad(); return new LoadHistoryResponce(history); @@ -103,7 +107,10 @@ private void loadTranscriptEntities(BulkLoadFileHistory history, List> gff3EntryPair : gffData) { try { - gff3Service.loadTranscriptEntity(history, gff3EntryPair, idsAdded, dataProvider); + Transcript transcript = gff3DtoValidator.validateTranscriptEntry(gff3EntryPair.getKey(), gff3EntryPair.getValue(), dataProvider); + if (transcript != null) { + idsAdded.add(transcript.getId()); + } history.incrementCompleted(); } catch (ObjectUpdateException e) { history.incrementFailed(); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptExonExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptExonExecutor.java similarity index 85% rename from src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptExonExecutor.java rename to src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptExonExecutor.java index 3d9bb8ea0..40555da29 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptExonExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptExonExecutor.java @@ -1,4 +1,4 @@ -package org.alliancegenome.curation_api.jobs.executors; +package org.alliancegenome.curation_api.jobs.executors.gff; import java.io.FileInputStream; import java.util.ArrayList; @@ -16,6 +16,7 @@ import org.alliancegenome.curation_api.response.APIResponse; import org.alliancegenome.curation_api.response.LoadHistoryResponce; import org.alliancegenome.curation_api.services.associations.transcriptAssociations.TranscriptExonAssociationService; +import org.alliancegenome.curation_api.services.helpers.gff3.Gff3AttributesHelper; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -52,16 +53,16 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + List>> preProcessedExonGffData = Gff3AttributesHelper.getExonGffData(gffData, dataProvider); gffData.clear(); List idsAdded = new ArrayList<>(); - bulkLoadFileHistory.setTotalRecords((long) preProcessedGffData.size()); + bulkLoadFileHistory.setTotalRecords((long) preProcessedExonGffData.size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedGffData, idsAdded, dataProvider, null); + boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedExonGffData, idsAdded, dataProvider, null); if (success) { runCleanup(transcriptExonService, bulkLoadFileHistory, dataProvider.name(), transcriptExonService.getIdsByDataProvider(dataProvider), idsAdded, "GFF transcript exon association"); @@ -81,14 +82,14 @@ private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, ph.addDisplayHandler(loadProcessDisplayService); ph.startProcess("GFF Transcript Exon update for " + dataProvider.name(), gffData.size()); - assemblyId = loadGenomeAssembly(assemblyId, history, gffHeaderData, dataProvider, ph); + assemblyId = loadGenomeAssembly(null, history, gffHeaderData, dataProvider, ph); if (assemblyId == null) { failLoad(history, new Exception("GFF Header does not contain assembly")); return false; } else { Map geneIdCurieMap = gff3Service.getIdCurieMap(gffData); - loadParentChildAssociations(history, gffData, idsAdded, dataProvider, assemblyId, geneIdCurieMap, ph); + loadParentChildAssociations(history, gffData, idsAdded, dataProvider, geneIdCurieMap, ph); } ph.finishProcess(); @@ -98,20 +99,20 @@ private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { List idsAdded = new ArrayList<>(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); - BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedGffData.size()); + List>> preProcessedExonGffData = Gff3AttributesHelper.getExonGffData(gffData, dataProvider); + BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedExonGffData.size()); - runLoad(history, null, preProcessedGffData, idsAdded, dataProvider, assemblyName); + runLoad(history, null, preProcessedExonGffData, idsAdded, dataProvider, assemblyName); history.finishLoad(); return new LoadHistoryResponce(history); } - private void loadParentChildAssociations(BulkLoadFileHistory history, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap, ProcessDisplayHelper ph) { + private void loadParentChildAssociations(BulkLoadFileHistory history, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider, Map geneIdCurieMap, ProcessDisplayHelper ph) { updateHistory(history); for (ImmutablePair> gff3EntryPair : gffData) { try { - gff3Service.loadExonParentChildAssociations(history, gff3EntryPair, idsAdded, dataProvider, assemblyId, geneIdCurieMap); + gff3Service.loadExonParentChildAssociations(history, gff3EntryPair, idsAdded, dataProvider, geneIdCurieMap); history.incrementCompleted(); } catch (ObjectUpdateException e) { history.incrementFailed(); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptGeneExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptGeneExecutor.java similarity index 86% rename from src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptGeneExecutor.java rename to src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptGeneExecutor.java index 166dfd549..2fc8f7163 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptGeneExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptGeneExecutor.java @@ -1,4 +1,4 @@ -package org.alliancegenome.curation_api.jobs.executors; +package org.alliancegenome.curation_api.jobs.executors.gff; import java.io.FileInputStream; import java.util.ArrayList; @@ -16,6 +16,7 @@ import org.alliancegenome.curation_api.response.APIResponse; import org.alliancegenome.curation_api.response.LoadHistoryResponce; import org.alliancegenome.curation_api.services.associations.transcriptAssociations.TranscriptGeneAssociationService; +import org.alliancegenome.curation_api.services.helpers.gff3.Gff3AttributesHelper; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -52,16 +53,16 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + List>> preProcessedTranscriptGffData = Gff3AttributesHelper.getTranscriptGffData(gffData, dataProvider); gffData.clear(); List idsAdded = new ArrayList<>(); - bulkLoadFileHistory.setTotalRecords((long) preProcessedGffData.size()); + bulkLoadFileHistory.setTotalRecords((long) preProcessedTranscriptGffData.size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedGffData, idsAdded, dataProvider, null); + boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedTranscriptGffData, idsAdded, dataProvider, null); if (success) { runCleanup(transcriptGeneService, bulkLoadFileHistory, dataProvider.name(), transcriptGeneService.getIdsByDataProvider(dataProvider), idsAdded, "GFF transcript gene association"); @@ -88,7 +89,7 @@ private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, return false; } else { Map geneIdCurieMap = gff3Service.getIdCurieMap(gffData); - loadParentChildAssociations(history, gffData, idsAdded, dataProvider, assemblyId, geneIdCurieMap, ph); + loadParentChildAssociations(history, gffData, idsAdded, dataProvider, geneIdCurieMap, ph); } ph.finishProcess(); @@ -98,20 +99,20 @@ private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { List idsAdded = new ArrayList<>(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); - BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedGffData.size()); + List>> preProcessedTranscriptGffData = Gff3AttributesHelper.getTranscriptGffData(gffData, dataProvider); + BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedTranscriptGffData.size()); - runLoad(history, null, preProcessedGffData, idsAdded, dataProvider, assemblyName); + runLoad(history, null, preProcessedTranscriptGffData, idsAdded, dataProvider, assemblyName); history.finishLoad(); return new LoadHistoryResponce(history); } - private void loadParentChildAssociations(BulkLoadFileHistory history, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap, ProcessDisplayHelper ph) { + private void loadParentChildAssociations(BulkLoadFileHistory history, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider, Map geneIdCurieMap, ProcessDisplayHelper ph) { updateHistory(history); for (ImmutablePair> gff3EntryPair : gffData) { try { - gff3Service.loadGeneParentChildAssociations(history, gff3EntryPair, idsAdded, dataProvider, assemblyId, geneIdCurieMap); + gff3Service.loadGeneParentChildAssociations(history, gff3EntryPair, idsAdded, dataProvider, geneIdCurieMap); history.incrementCompleted(); } catch (ObjectUpdateException e) { history.incrementFailed(); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptLocationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptLocationExecutor.java similarity index 90% rename from src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptLocationExecutor.java rename to src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptLocationExecutor.java index 456482a1b..a59131bab 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/Gff3TranscriptLocationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptLocationExecutor.java @@ -1,4 +1,4 @@ -package org.alliancegenome.curation_api.jobs.executors; +package org.alliancegenome.curation_api.jobs.executors.gff; import java.io.FileInputStream; import java.util.ArrayList; @@ -16,6 +16,7 @@ import org.alliancegenome.curation_api.response.APIResponse; import org.alliancegenome.curation_api.response.LoadHistoryResponce; import org.alliancegenome.curation_api.services.associations.transcriptAssociations.TranscriptGenomicLocationAssociationService; +import org.alliancegenome.curation_api.services.helpers.gff3.Gff3AttributesHelper; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -52,16 +53,16 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); + List>> preProcessedTranscriptGffData = Gff3AttributesHelper.getTranscriptGffData(gffData, dataProvider); gffData.clear(); List idsAdded = new ArrayList<>(); - bulkLoadFileHistory.setTotalRecords((long) preProcessedGffData.size()); + bulkLoadFileHistory.setTotalRecords((long) preProcessedTranscriptGffData.size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedGffData, idsAdded, dataProvider, null); + boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedTranscriptGffData, idsAdded, dataProvider, null); if (success) { runCleanup(transcriptLocationService, bulkLoadFileHistory, dataProvider.name(), transcriptLocationService.getIdsByDataProvider(dataProvider), idsAdded, "GFF transcript genomic location association"); @@ -98,10 +99,10 @@ private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { List idsAdded = new ArrayList<>(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); - List>> preProcessedGffData = preProcessGffData(gffData, dataProvider); - BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedGffData.size()); + List>> preProcessedTranscriptGffData = Gff3AttributesHelper.getTranscriptGffData(gffData, dataProvider); + BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedTranscriptGffData.size()); - runLoad(history, null, preProcessedGffData, idsAdded, dataProvider, assemblyName); + runLoad(history, null, preProcessedTranscriptGffData, idsAdded, dataProvider, assemblyName); history.finishLoad(); return new LoadHistoryResponce(history); diff --git a/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java b/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java index 792468ea4..208bb51b2 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java +++ b/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java @@ -60,45 +60,6 @@ public class Gff3Service { @Inject NcbiTaxonTermService ncbiTaxonTermService; @Inject Gff3DtoValidator gff3DtoValidator; - @Transactional - public void loadExonEntity(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider) throws ObjectUpdateException { - Gff3DTO gffEntry = gffEntryPair.getKey(); - Map attributes = gffEntryPair.getValue(); - if (StringUtils.equals(gffEntry.getType(), "exon") || StringUtils.equals(gffEntry.getType(), "noncoding_exon")) { - Exon exon = gff3DtoValidator.validateExonEntry(gffEntry, attributes, dataProvider); - if (exon != null) { - idsAdded.add(exon.getId()); - } - } - } - - @Transactional - public void loadCDSEntity(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider) throws ObjectUpdateException { - Gff3DTO gffEntry = gffEntryPair.getKey(); - Map attributes = gffEntryPair.getValue(); - if (StringUtils.equals(gffEntry.getType(), "CDS")) { - CodingSequence cds = gff3DtoValidator.validateCdsEntry(gffEntry, attributes, dataProvider); - if (cds != null) { - idsAdded.add(cds.getId()); - } - } - } - - @Transactional - public void loadTranscriptEntity(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider) throws ObjectUpdateException { - Gff3DTO gffEntry = gffEntryPair.getKey(); - Map attributes = gffEntryPair.getValue(); - if (Gff3Constants.TRANSCRIPT_TYPES.contains(gffEntry.getType())) { - if (StringUtils.equals(gffEntry.getType(), "lnc_RNA")) { - gffEntry.setType("lncRNA"); - } - Transcript transcript = gff3DtoValidator.validateTranscriptEntry(gffEntry, attributes, dataProvider); - if (transcript != null) { - idsAdded.add(transcript.getId()); - } - } - } - @Transactional public String loadGenomeAssembly(String assemblyName, List gffHeaderData, BackendBulkDataProvider dataProvider) throws ObjectUpdateException { @@ -134,25 +95,28 @@ public String loadGenomeAssembly(String assemblyName, List gffHeaderData @Transactional public void loadExonLocationAssociations(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap) throws ObjectUpdateException { Gff3DTO gffEntry = gffEntryPair.getKey(); - Map attributes = gffEntryPair.getValue(); + if (StringUtils.isBlank(assemblyId)) { throw new ObjectValidationException(gffEntry, "Cannot load associations without assembly"); } - if (StringUtils.equals(gffEntry.getType(), "exon") || StringUtils.equals(gffEntry.getType(), "noncoding_exon")) { - String uniqueId = Gff3UniqueIdHelper.getExonOrCodingSequenceUniqueId(gffEntry, attributes, dataProvider); - SearchResponse response = exonDAO.findByField("uniqueId", uniqueId); - if (response == null || response.getSingleResult() == null) { - throw new ObjectValidationException(gffEntry, "uniqueId - " + ValidationConstants.INVALID_MESSAGE + " (" + uniqueId + ")"); - } - Exon exon = response.getSingleResult(); + if (!StringUtils.equals(gffEntry.getType(), "exon") && !StringUtils.equals(gffEntry.getType(), "noncoding_exon")) { + throw new ObjectValidationException(gffEntry, "Invalid Type: " + gffEntry.getType() + " for Exon Location"); + } - ExonGenomicLocationAssociation exonLocation = gff3DtoValidator.validateExonLocation(gffEntry, exon, assemblyId, dataProvider); - if (exonLocation != null) { - idsAdded.add(exonLocation.getId()); - exonLocationService.addAssociationToSubject(exonLocation); - } + String uniqueId = Gff3UniqueIdHelper.getExonOrCodingSequenceUniqueId(gffEntry, gffEntryPair.getValue(), dataProvider); + SearchResponse response = exonDAO.findByField("uniqueId", uniqueId); + if (response == null || response.getSingleResult() == null) { + throw new ObjectValidationException(gffEntry, "uniqueId - " + ValidationConstants.INVALID_MESSAGE + " (" + uniqueId + ")"); } + Exon exon = response.getSingleResult(); + + ExonGenomicLocationAssociation exonLocation = gff3DtoValidator.validateExonLocation(gffEntry, exon, assemblyId, dataProvider); + if (exonLocation != null) { + idsAdded.add(exonLocation.getId()); + exonLocationService.addAssociationToSubject(exonLocation); + } + } @Transactional @@ -162,20 +126,24 @@ public void loadCDSLocationAssociations(BulkLoadFileHistory history, ImmutablePa if (StringUtils.isBlank(assemblyId)) { throw new ObjectValidationException(gffEntry, "Cannot load associations without assembly"); } - if (StringUtils.equals(gffEntry.getType(), "CDS")) { - String uniqueId = Gff3UniqueIdHelper.getExonOrCodingSequenceUniqueId(gffEntry, attributes, dataProvider); - SearchResponse response = cdsDAO.findByField("uniqueId", uniqueId); - if (response == null || response.getSingleResult() == null) { - throw new ObjectValidationException(gffEntry, "uniqueId - " + ValidationConstants.INVALID_MESSAGE + " (" + uniqueId + ")"); - } - CodingSequence cds = response.getSingleResult(); - CodingSequenceGenomicLocationAssociation cdsLocation = gff3DtoValidator.validateCdsLocation(gffEntry, cds, assemblyId, dataProvider); - if (cdsLocation != null) { - idsAdded.add(cdsLocation.getId()); - cdsLocationService.addAssociationToSubject(cdsLocation); - } + if (!StringUtils.equals(gffEntry.getType(), "CDS")) { + throw new ObjectValidationException(gffEntry, "Invalid Type: " + gffEntry.getType() + " for CDS Location"); + } + + String uniqueId = Gff3UniqueIdHelper.getExonOrCodingSequenceUniqueId(gffEntry, attributes, dataProvider); + SearchResponse response = cdsDAO.findByField("uniqueId", uniqueId); + if (response == null || response.getSingleResult() == null) { + throw new ObjectValidationException(gffEntry, "uniqueId - " + ValidationConstants.INVALID_MESSAGE + " (" + uniqueId + ")"); + } + CodingSequence cds = response.getSingleResult(); + + CodingSequenceGenomicLocationAssociation cdsLocation = gff3DtoValidator.validateCdsLocation(gffEntry, cds, assemblyId, dataProvider); + if (cdsLocation != null) { + idsAdded.add(cdsLocation.getId()); + cdsLocationService.addAssociationToSubject(cdsLocation); } + } @Transactional @@ -186,100 +154,95 @@ public void loadTranscriptLocationAssociations(BulkLoadFileHistory history, Immu throw new ObjectValidationException(gffEntry, "Cannot load associations without assembly"); } - if (Gff3Constants.TRANSCRIPT_TYPES.contains(gffEntry.getType())) { - if (StringUtils.equals(gffEntry.getType(), "lnc_RNA")) { - gffEntry.setType("lncRNA"); - } - if (!attributes.containsKey("ID")) { - throw new ObjectValidationException(gffEntry, "attributes - ID - " + ValidationConstants.REQUIRED_MESSAGE); - } - SearchResponse response = transcriptDAO.findByField("modInternalId", attributes.get("ID")); - if (response == null || response.getSingleResult() == null) { - throw new ObjectValidationException(gffEntry, "attributes - ID - " + ValidationConstants.INVALID_MESSAGE + " (" + attributes.get("ID") + ")"); - } - Transcript transcript = response.getSingleResult(); + if (!Gff3Constants.TRANSCRIPT_TYPES.contains(gffEntry.getType())) { + throw new ObjectValidationException(gffEntry, "Invalid Type: " + gffEntry.getType() + " for Transcript Location"); + } - TranscriptGenomicLocationAssociation transcriptLocation = gff3DtoValidator.validateTranscriptLocation(gffEntry, transcript, assemblyId, dataProvider); - if (transcriptLocation != null) { - idsAdded.add(transcriptLocation.getId()); - transcriptLocationService.addAssociationToSubject(transcriptLocation); - } + if (!attributes.containsKey("ID")) { + throw new ObjectValidationException(gffEntry, "attributes - ID - " + ValidationConstants.REQUIRED_MESSAGE); + } + SearchResponse response = transcriptDAO.findByField("modInternalId", attributes.get("ID")); + if (response == null || response.getSingleResult() == null) { + throw new ObjectValidationException(gffEntry, "attributes - ID - " + ValidationConstants.INVALID_MESSAGE + " (" + attributes.get("ID") + ")"); + } + Transcript transcript = response.getSingleResult(); + + TranscriptGenomicLocationAssociation transcriptLocation = gff3DtoValidator.validateTranscriptLocation(gffEntry, transcript, assemblyId, dataProvider); + if (transcriptLocation != null) { + idsAdded.add(transcriptLocation.getId()); + transcriptLocationService.addAssociationToSubject(transcriptLocation); } } - + @Transactional - public void loadExonParentChildAssociations(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap) throws ObjectUpdateException { + public void loadExonParentChildAssociations(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider, Map geneIdCurieMap) throws ObjectUpdateException { Gff3DTO gffEntry = gffEntryPair.getKey(); - Map attributes = gffEntryPair.getValue(); - if (StringUtils.isBlank(assemblyId)) { - throw new ObjectValidationException(gffEntry, "Cannot load associations without assembly"); + + if (!StringUtils.equals(gffEntry.getType(), "exon") && !StringUtils.equals(gffEntry.getType(), "noncoding_exon")) { + throw new ObjectValidationException(gffEntry, "Invalid Type: " + gffEntry.getType() + " for Exon Transcript Associations"); } - if (StringUtils.equals(gffEntry.getType(), "exon") || StringUtils.equals(gffEntry.getType(), "noncoding_exon")) { - String uniqueId = Gff3UniqueIdHelper.getExonOrCodingSequenceUniqueId(gffEntry, attributes, dataProvider); - SearchResponse response = exonDAO.findByField("uniqueId", uniqueId); - if (response == null || response.getSingleResult() == null) { - throw new ObjectValidationException(gffEntry, "uniqueId - " + ValidationConstants.INVALID_MESSAGE + " (" + uniqueId + ")"); - } - Exon exon = response.getSingleResult(); + Map attributes = gffEntryPair.getValue(); + String uniqueId = Gff3UniqueIdHelper.getExonOrCodingSequenceUniqueId(gffEntry, attributes, dataProvider); + SearchResponse response = exonDAO.findByField("uniqueId", uniqueId); + if (response == null || response.getSingleResult() == null) { + throw new ObjectValidationException(gffEntry, "uniqueId - " + ValidationConstants.INVALID_MESSAGE + " (" + uniqueId + ")"); + } + Exon exon = response.getSingleResult(); - TranscriptExonAssociation transcriptAssociation = gff3DtoValidator.validateTranscriptExonAssociation(gffEntry, exon, attributes); - if (transcriptAssociation != null) { - idsAdded.add(transcriptAssociation.getId()); - transcriptExonService.addAssociationToSubjectAndObject(transcriptAssociation); - } + TranscriptExonAssociation transcriptAssociation = gff3DtoValidator.validateTranscriptExonAssociation(gffEntry, exon, attributes); + if (transcriptAssociation != null) { + idsAdded.add(transcriptAssociation.getId()); + transcriptExonService.addAssociationToSubjectAndObject(transcriptAssociation); } + } - + @Transactional - public void loadCDSParentChildAssociations(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap) throws ObjectUpdateException { + public void loadCDSParentChildAssociations(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider, Map geneIdCurieMap) throws ObjectUpdateException { Gff3DTO gffEntry = gffEntryPair.getKey(); Map attributes = gffEntryPair.getValue(); - if (StringUtils.isBlank(assemblyId)) { - throw new ObjectValidationException(gffEntry, "Cannot load associations without assembly"); + + if (!StringUtils.equals(gffEntry.getType(), "CDS")) { + throw new ObjectValidationException(gffEntry, "Invalid Type: " + gffEntry.getType() + " for CDS Transcript Associations"); } - if (StringUtils.equals(gffEntry.getType(), "CDS")) { - String uniqueId = Gff3UniqueIdHelper.getExonOrCodingSequenceUniqueId(gffEntry, attributes, dataProvider); - SearchResponse response = cdsDAO.findByField("uniqueId", uniqueId); - if (response == null || response.getSingleResult() == null) { - throw new ObjectValidationException(gffEntry, "uniqueId - " + ValidationConstants.INVALID_MESSAGE + " (" + uniqueId + ")"); - } - CodingSequence cds = response.getSingleResult(); - TranscriptCodingSequenceAssociation transcriptAssociation = gff3DtoValidator.validateTranscriptCodingSequenceAssociation(gffEntry, cds, attributes); - if (transcriptAssociation != null) { - idsAdded.add(transcriptAssociation.getId()); - transcriptCdsService.addAssociationToSubjectAndObject(transcriptAssociation); - } + String uniqueId = Gff3UniqueIdHelper.getExonOrCodingSequenceUniqueId(gffEntry, attributes, dataProvider); + SearchResponse response = cdsDAO.findByField("uniqueId", uniqueId); + if (response == null || response.getSingleResult() == null) { + throw new ObjectValidationException(gffEntry, "uniqueId - " + ValidationConstants.INVALID_MESSAGE + " (" + uniqueId + ")"); + } + CodingSequence cds = response.getSingleResult(); + + TranscriptCodingSequenceAssociation transcriptAssociation = gff3DtoValidator.validateTranscriptCodingSequenceAssociation(gffEntry, cds, attributes); + if (transcriptAssociation != null) { + idsAdded.add(transcriptAssociation.getId()); + transcriptCdsService.addAssociationToSubjectAndObject(transcriptAssociation); } } @Transactional - public void loadGeneParentChildAssociations(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider, String assemblyId, Map geneIdCurieMap) throws ObjectUpdateException { + public void loadGeneParentChildAssociations(BulkLoadFileHistory history, ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider, Map geneIdCurieMap) throws ObjectUpdateException { Gff3DTO gffEntry = gffEntryPair.getKey(); + if (!Gff3Constants.TRANSCRIPT_TYPES.contains(gffEntry.getType())) { + throw new ObjectValidationException(gffEntry, "Invalid Type: " + gffEntry.getType() + " for Gene Transcript Associations"); + } + Map attributes = gffEntryPair.getValue(); - if (StringUtils.isBlank(assemblyId)) { - throw new ObjectValidationException(gffEntry, "Cannot load associations without assembly"); + if (!attributes.containsKey("ID")) { + throw new ObjectValidationException(gffEntry, "attributes - ID - " + ValidationConstants.REQUIRED_MESSAGE); } - if (Gff3Constants.TRANSCRIPT_TYPES.contains(gffEntry.getType())) { - if (StringUtils.equals(gffEntry.getType(), "lnc_RNA")) { - gffEntry.setType("lncRNA"); - } - if (!attributes.containsKey("ID")) { - throw new ObjectValidationException(gffEntry, "attributes - ID - " + ValidationConstants.REQUIRED_MESSAGE); - } - SearchResponse response = transcriptDAO.findByField("modInternalId", attributes.get("ID")); - if (response == null || response.getSingleResult() == null) { - throw new ObjectValidationException(gffEntry, "attributes - ID - " + ValidationConstants.INVALID_MESSAGE + " (" + attributes.get("ID") + ")"); - } - Transcript transcript = response.getSingleResult(); + SearchResponse response = transcriptDAO.findByField("modInternalId", attributes.get("ID")); + if (response == null || response.getSingleResult() == null) { + throw new ObjectValidationException(gffEntry, "attributes - ID - " + ValidationConstants.INVALID_MESSAGE + " (" + attributes.get("ID") + ")"); + } + Transcript transcript = response.getSingleResult(); - TranscriptGeneAssociation geneAssociation = gff3DtoValidator.validateTranscriptGeneAssociation(gffEntry, transcript, attributes, geneIdCurieMap); - if (geneAssociation != null) { - idsAdded.add(geneAssociation.getId()); - transcriptGeneService.addAssociationToSubjectAndObject(geneAssociation); - } + TranscriptGeneAssociation geneAssociation = gff3DtoValidator.validateTranscriptGeneAssociation(gffEntry, transcript, attributes, geneIdCurieMap); + if (geneAssociation != null) { + idsAdded.add(geneAssociation.getId()); + transcriptGeneService.addAssociationToSubjectAndObject(geneAssociation); } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/helpers/gff3/Gff3AttributesHelper.java b/src/main/java/org/alliancegenome/curation_api/services/helpers/gff3/Gff3AttributesHelper.java index ef6eef1c3..2f2c0f51b 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/helpers/gff3/Gff3AttributesHelper.java +++ b/src/main/java/org/alliancegenome/curation_api/services/helpers/gff3/Gff3AttributesHelper.java @@ -5,10 +5,13 @@ import java.util.List; import java.util.Map; +import org.alliancegenome.curation_api.constants.Gff3Constants; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; +import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; public class Gff3AttributesHelper { @@ -49,5 +52,70 @@ public static Map getAttributes(Gff3DTO dto, BackendBulkDataProv return attributes; } + + public static List>> getExonGffData(List gffData, BackendBulkDataProvider dataProvider) { + List>> retGffData = new ArrayList<>(); + ProcessDisplayHelper ph = new ProcessDisplayHelper(); + ph.startProcess("GFF Exon pre-processing for " + dataProvider.name(), gffData.size()); + for (Gff3DTO originalGffEntry : gffData) { + if (StringUtils.equals(originalGffEntry.getType(), "exon") || StringUtils.equals(originalGffEntry.getType(), "noncoding_exon")) { + processGffEntry(originalGffEntry, retGffData, dataProvider); + } + ph.progressProcess(); + } + ph.finishProcess(); + return retGffData; + } + + public static List>> getCDSGffData(List gffData, BackendBulkDataProvider dataProvider) { + List>> retGffData = new ArrayList<>(); + ProcessDisplayHelper ph = new ProcessDisplayHelper(); + ph.startProcess("GFF CDS pre-processing for " + dataProvider.name(), gffData.size()); + for (Gff3DTO originalGffEntry : gffData) { + if (StringUtils.equals(originalGffEntry.getType(), "CDS")) { + processGffEntry(originalGffEntry, retGffData, dataProvider); + } + ph.progressProcess(); + } + ph.finishProcess(); + return retGffData; + } + + + public static List>> getTranscriptGffData(List gffData, BackendBulkDataProvider dataProvider) { + List>> retGffData = new ArrayList<>(); + ProcessDisplayHelper ph = new ProcessDisplayHelper(); + ph.startProcess("GFF Transcript pre-processing for " + dataProvider.name(), gffData.size()); + for (Gff3DTO originalGffEntry : gffData) { + if (StringUtils.equals(originalGffEntry.getType(), "lnc_RNA")) { + originalGffEntry.setType("lncRNA"); + } + if (Gff3Constants.TRANSCRIPT_TYPES.contains(originalGffEntry.getType())) { + processGffEntry(originalGffEntry, retGffData, dataProvider); + } + ph.progressProcess(); + } + ph.finishProcess(); + return retGffData; + } + + private static void processGffEntry(Gff3DTO originalGffEntry, List>> retGffData, BackendBulkDataProvider dataProvider) { + Map attributes = getAttributes(originalGffEntry, dataProvider); + if (attributes.containsKey("Parent") && attributes.get("Parent").indexOf(",") > -1) { + for (String parent : attributes.get("Parent").split(",")) { + HashMap attributesCopy = new HashMap<>(); + attributesCopy.putAll(attributes); + String[] parentIdParts = parent.split(":"); + if (parentIdParts.length == 1) { + parent = dataProvider.name() + ':' + parentIdParts[0]; + } + attributesCopy.put("Parent", parent); + retGffData.add(new ImmutablePair<>(originalGffEntry, attributesCopy)); + } + } else { + retGffData.add(new ImmutablePair<>(originalGffEntry, attributes)); + } + + } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java index 4ccaa427c..8659ab3fa 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java @@ -74,6 +74,10 @@ public class Gff3DtoValidator { public Exon validateExonEntry(Gff3DTO dto, Map attributes, BackendBulkDataProvider dataProvider) throws ObjectValidationException { Exon exon = null; + + if (!StringUtils.equals(dto.getType(), "exon") && !StringUtils.equals(dto.getType(), "noncoding_exon")) { + throw new ObjectValidationException(dto, "Invalid Type: " + dto.getType() + " for Exon Entity"); + } String uniqueId = Gff3UniqueIdHelper.getExonOrCodingSequenceUniqueId(dto, attributes, dataProvider); SearchResponse searchResponse = exonDAO.findByField("uniqueId", uniqueId); @@ -102,6 +106,10 @@ public CodingSequence validateCdsEntry(Gff3DTO dto, Map attribut CodingSequence cds = null; + if (!StringUtils.equals(dto.getType(), "CDS")) { + throw new ObjectValidationException(dto, "Invalid Type: " + dto.getType() + " for CDS Entity"); + } + String uniqueId = Gff3UniqueIdHelper.getExonOrCodingSequenceUniqueId(dto, attributes, dataProvider); SearchResponse searchResponse = codingSequenceDAO.findByField("uniqueId", uniqueId); if (searchResponse != null && searchResponse.getSingleResult() != null) { @@ -127,8 +135,11 @@ public CodingSequence validateCdsEntry(Gff3DTO dto, Map attribut @Transactional public Transcript validateTranscriptEntry(Gff3DTO dto, Map attributes, BackendBulkDataProvider dataProvider) throws ObjectValidationException { + if (!Gff3Constants.TRANSCRIPT_TYPES.contains(dto.getType())) { + throw new ObjectValidationException(dto, "Invalid Type: " + dto.getType() + " for Transcript Entity"); + } + Transcript transcript = null; - if (attributes.containsKey("ID")) { SearchResponse searchResponse = transcriptDAO.findByField("modInternalId", attributes.get("ID")); if (searchResponse != null && searchResponse.getSingleResult() != null) { From bcca90c49d49d2563e36f1cdef231c111b76d383 Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Tue, 3 Sep 2024 22:46:39 -0600 Subject: [PATCH 05/12] Removed assembly id as its not needed for the associations --- .../gff/Gff3TranscriptCDSExecutor.java | 12 +++--------- .../gff/Gff3TranscriptExonExecutor.java | 12 +++--------- .../gff/Gff3TranscriptGeneExecutor.java | 18 ++++++------------ 3 files changed, 12 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptCDSExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptCDSExecutor.java index a91b5cdaa..4fd75a255 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptCDSExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptCDSExecutor.java @@ -82,15 +82,9 @@ private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, ph.addDisplayHandler(loadProcessDisplayService); ph.startProcess("GFF Transcript CDS update for " + dataProvider.name(), gffData.size()); - assemblyId = loadGenomeAssembly(assemblyId, history, gffHeaderData, dataProvider, ph); - - if (assemblyId == null) { - failLoad(history, new Exception("GFF Header does not contain assembly")); - return false; - } else { - Map geneIdCurieMap = gff3Service.getIdCurieMap(gffData); - loadParentChildAssociations(history, gffData, idsAdded, dataProvider, geneIdCurieMap, ph); - } + Map geneIdCurieMap = gff3Service.getIdCurieMap(gffData); + loadParentChildAssociations(history, gffData, idsAdded, dataProvider, geneIdCurieMap, ph); + ph.finishProcess(); return true; diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptExonExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptExonExecutor.java index 40555da29..0162208a2 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptExonExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptExonExecutor.java @@ -82,15 +82,9 @@ private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, ph.addDisplayHandler(loadProcessDisplayService); ph.startProcess("GFF Transcript Exon update for " + dataProvider.name(), gffData.size()); - assemblyId = loadGenomeAssembly(null, history, gffHeaderData, dataProvider, ph); - - if (assemblyId == null) { - failLoad(history, new Exception("GFF Header does not contain assembly")); - return false; - } else { - Map geneIdCurieMap = gff3Service.getIdCurieMap(gffData); - loadParentChildAssociations(history, gffData, idsAdded, dataProvider, geneIdCurieMap, ph); - } + Map geneIdCurieMap = gff3Service.getIdCurieMap(gffData); + loadParentChildAssociations(history, gffData, idsAdded, dataProvider, geneIdCurieMap, ph); + ph.finishProcess(); return true; diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptGeneExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptGeneExecutor.java index 2fc8f7163..4e58fd818 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptGeneExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3TranscriptGeneExecutor.java @@ -62,7 +62,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { bulkLoadFileHistory.setTotalRecords((long) preProcessedTranscriptGffData.size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedTranscriptGffData, idsAdded, dataProvider, null); + boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedTranscriptGffData, idsAdded, dataProvider); if (success) { runCleanup(transcriptGeneService, bulkLoadFileHistory, dataProvider.name(), transcriptGeneService.getIdsByDataProvider(dataProvider), idsAdded, "GFF transcript gene association"); @@ -76,21 +76,15 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { } } - private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider, String assemblyId) { + private boolean runLoad(BulkLoadFileHistory history, List gffHeaderData, List>> gffData, List idsAdded, BackendBulkDataProvider dataProvider) { ProcessDisplayHelper ph = new ProcessDisplayHelper(); ph.addDisplayHandler(loadProcessDisplayService); ph.startProcess("GFF Transcript Gene update for " + dataProvider.name(), gffData.size()); - assemblyId = loadGenomeAssembly(assemblyId, history, gffHeaderData, dataProvider, ph); - - if (assemblyId == null) { - failLoad(history, new Exception("GFF Header does not contain assembly")); - return false; - } else { - Map geneIdCurieMap = gff3Service.getIdCurieMap(gffData); - loadParentChildAssociations(history, gffData, idsAdded, dataProvider, geneIdCurieMap, ph); - } + Map geneIdCurieMap = gff3Service.getIdCurieMap(gffData); + loadParentChildAssociations(history, gffData, idsAdded, dataProvider, geneIdCurieMap, ph); + ph.finishProcess(); return true; @@ -102,7 +96,7 @@ public APIResponse runLoadApi(String dataProviderName, String assemblyName, List List>> preProcessedTranscriptGffData = Gff3AttributesHelper.getTranscriptGffData(gffData, dataProvider); BulkLoadFileHistory history = new BulkLoadFileHistory(preProcessedTranscriptGffData.size()); - runLoad(history, null, preProcessedTranscriptGffData, idsAdded, dataProvider, assemblyName); + runLoad(history, null, preProcessedTranscriptGffData, idsAdded, dataProvider); history.finishLoad(); return new LoadHistoryResponce(history); From 684e69b607786b05d3571f1f8e2a72f5d95005c8 Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Tue, 3 Sep 2024 23:24:32 -0600 Subject: [PATCH 06/12] Fixed IT's --- .../controllers/crud/TranscriptCrudController.java | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java index 66f391198..b225267fe 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java @@ -54,14 +54,6 @@ public APIResponse updateTranscripts(String dataProvider, String assembly, List< history.setFailedRecords(history.getFailedRecords() + resp.getHistory().getFailedRecords()); history.setCompletedRecords(history.getCompletedRecords() + resp.getHistory().getCompletedRecords()); history.setTotalRecords(history.getTotalRecords() + resp.getHistory().getTotalRecords()); - resp = (LoadHistoryResponce) gff3TranscriptCDSExecutor.runLoadApi(dataProvider, assembly, gffData); - history.setFailedRecords(history.getFailedRecords() + resp.getHistory().getFailedRecords()); - history.setCompletedRecords(history.getCompletedRecords() + resp.getHistory().getCompletedRecords()); - history.setTotalRecords(history.getTotalRecords() + resp.getHistory().getTotalRecords()); - resp = (LoadHistoryResponce) gff3TranscriptExonExecutor.runLoadApi(dataProvider, assembly, gffData); - history.setFailedRecords(history.getFailedRecords() + resp.getHistory().getFailedRecords()); - history.setCompletedRecords(history.getCompletedRecords() + resp.getHistory().getCompletedRecords()); - history.setTotalRecords(history.getTotalRecords() + resp.getHistory().getTotalRecords()); return new LoadHistoryResponce(history); } From 032e375ca47c0a1f15ca8d7173aba5b5b1ef66fc Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Wed, 4 Sep 2024 06:35:35 -0600 Subject: [PATCH 07/12] Fix IT's --- .../curation_api/Gff3BulkUploadITCase.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java index ae74b8687..b352a5de9 100644 --- a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java @@ -60,7 +60,7 @@ private void loadRequiredEntities() throws Exception { public void gff3DataBulkUploadTranscriptEntity() throws Exception { loadRequiredEntities(); - checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "GFF_01_transcript.json", 5, 0, 9); + checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "GFF_01_transcript.json", 3, 0, 9); RestAssured.given(). when(). @@ -89,7 +89,7 @@ public void gff3DataBulkUploadTranscriptEntity() throws Exception { @Test @Order(2) public void gff3DataBulkUploadExonEntity() throws Exception { - checkFailedBulkLoad(exonBulkPostEndpoint, gffDataTestFilePath + "GFF_02_exon.json", 3, 0, 5); + checkFailedBulkLoad(exonBulkPostEndpoint, gffDataTestFilePath + "GFF_02_exon.json", 3, 0, 4); RestAssured.given(). when(). @@ -120,7 +120,7 @@ public void gff3DataBulkUploadExonEntity() throws Exception { @Test @Order(3) public void gff3DataBulkUploadCodingSequenceEntity() throws Exception { - checkFailedBulkLoad(cdsBulkPostEndpoint, gffDataTestFilePath + "GFF_03_CDS.json", 3, 0, 5); + checkFailedBulkLoad(cdsBulkPostEndpoint, gffDataTestFilePath + "GFF_03_CDS.json", 3, 0, 4); RestAssured.given(). when(). @@ -153,7 +153,7 @@ public void gff3DataBulkUploadCodingSequenceEntity() throws Exception { @Test @Order(4) public void gff3DataBulkUploadUpdateTranscriptEntity() throws Exception { - checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "UD_01_update_transcript.json", 5, 0, 9); + checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "UD_01_update_transcript.json", 3, 0, 9); RestAssured.given(). when(). @@ -179,7 +179,7 @@ public void gff3DataBulkUploadUpdateTranscriptEntity() throws Exception { @Test @Order(5) public void gff3DataBulkUploadMissingRequiredFields() throws Exception { - int total = 5; + int total = 3; int failed = 1; int completed = 8; checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_01_no_seq_id.json", total, failed, completed); @@ -194,7 +194,7 @@ public void gff3DataBulkUploadMissingRequiredFields() throws Exception { @Test @Order(6) public void gff3DataBulkUploadEmptyRequiredFields() throws Exception { - int total = 5; + int total = 3; int failed = 1; int completed = 8; checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_01_empty_seq_id.json", total, failed, completed); @@ -207,7 +207,7 @@ public void gff3DataBulkUploadEmptyRequiredFields() throws Exception { @Test @Order(7) public void gff3DataBulkUploadInvalidFields() throws Exception { - int total = 5; + int total = 3; int failed = 1; int completed = 8; checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_01_invalid_strand.json", total, failed, completed); From 73f1cd3630b82420f6f46fd0d73fe51b32fb157e Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Wed, 4 Sep 2024 06:59:34 -0600 Subject: [PATCH 08/12] Fix IT's --- .../curation_api/Gff3BulkUploadITCase.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java index b352a5de9..763f42fde 100644 --- a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java @@ -60,7 +60,7 @@ private void loadRequiredEntities() throws Exception { public void gff3DataBulkUploadTranscriptEntity() throws Exception { loadRequiredEntities(); - checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "GFF_01_transcript.json", 3, 0, 9); + checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "GFF_01_transcript.json", 3, 0, 4); RestAssured.given(). when(). @@ -153,7 +153,7 @@ public void gff3DataBulkUploadCodingSequenceEntity() throws Exception { @Test @Order(4) public void gff3DataBulkUploadUpdateTranscriptEntity() throws Exception { - checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "UD_01_update_transcript.json", 3, 0, 9); + checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "UD_01_update_transcript.json", 3, 0, 4); RestAssured.given(). when(). @@ -181,7 +181,7 @@ public void gff3DataBulkUploadUpdateTranscriptEntity() throws Exception { public void gff3DataBulkUploadMissingRequiredFields() throws Exception { int total = 3; int failed = 1; - int completed = 8; + int completed = 3; checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_01_no_seq_id.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_02_no_start.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_03_no_end.json", total, failed, completed); @@ -196,7 +196,7 @@ public void gff3DataBulkUploadMissingRequiredFields() throws Exception { public void gff3DataBulkUploadEmptyRequiredFields() throws Exception { int total = 3; int failed = 1; - int completed = 8; + int completed = 3; checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_01_empty_seq_id.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_02_empty_strand.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_03_empty_transcript_parent.json", total, failed, completed); @@ -209,7 +209,7 @@ public void gff3DataBulkUploadEmptyRequiredFields() throws Exception { public void gff3DataBulkUploadInvalidFields() throws Exception { int total = 3; int failed = 1; - int completed = 8; + int completed = 3; checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_01_invalid_strand.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_02_invalid_phase.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_03_invalid_transcript_parent.json", total, failed, completed); From ac033aedb8522e506fc05e8b411e55625d6df44d Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Wed, 4 Sep 2024 07:08:08 -0600 Subject: [PATCH 09/12] Fix transcript endpoint --- .../controllers/crud/TranscriptCrudController.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java index b225267fe..66f391198 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/TranscriptCrudController.java @@ -54,6 +54,14 @@ public APIResponse updateTranscripts(String dataProvider, String assembly, List< history.setFailedRecords(history.getFailedRecords() + resp.getHistory().getFailedRecords()); history.setCompletedRecords(history.getCompletedRecords() + resp.getHistory().getCompletedRecords()); history.setTotalRecords(history.getTotalRecords() + resp.getHistory().getTotalRecords()); + resp = (LoadHistoryResponce) gff3TranscriptCDSExecutor.runLoadApi(dataProvider, assembly, gffData); + history.setFailedRecords(history.getFailedRecords() + resp.getHistory().getFailedRecords()); + history.setCompletedRecords(history.getCompletedRecords() + resp.getHistory().getCompletedRecords()); + history.setTotalRecords(history.getTotalRecords() + resp.getHistory().getTotalRecords()); + resp = (LoadHistoryResponce) gff3TranscriptExonExecutor.runLoadApi(dataProvider, assembly, gffData); + history.setFailedRecords(history.getFailedRecords() + resp.getHistory().getFailedRecords()); + history.setCompletedRecords(history.getCompletedRecords() + resp.getHistory().getCompletedRecords()); + history.setTotalRecords(history.getTotalRecords() + resp.getHistory().getTotalRecords()); return new LoadHistoryResponce(history); } From 9c931a47fcad8eff3d4d68d27f012c157ff673fe Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Wed, 4 Sep 2024 07:28:27 -0600 Subject: [PATCH 10/12] Fix IT's --- .../alliancegenome/curation_api/Gff3BulkUploadITCase.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java index 763f42fde..a586b8056 100644 --- a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java @@ -187,7 +187,7 @@ public void gff3DataBulkUploadMissingRequiredFields() throws Exception { checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_03_no_end.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_04_no_strand.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_05_no_transcript_parent.json", total, failed, completed); - checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_06_no_exon_parent.json", total, failed, completed); + checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_06_no_exon_parent.json", 1, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_07_no_cds_parent.json", total, failed, completed); } @@ -200,7 +200,7 @@ public void gff3DataBulkUploadEmptyRequiredFields() throws Exception { checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_01_empty_seq_id.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_02_empty_strand.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_03_empty_transcript_parent.json", total, failed, completed); - checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_04_empty_exon_parent.json", total, failed, completed); + checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_04_empty_exon_parent.json", 1, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_05_empty_cds_parent.json", total, failed, completed); } @@ -213,7 +213,7 @@ public void gff3DataBulkUploadInvalidFields() throws Exception { checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_01_invalid_strand.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_02_invalid_phase.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_03_invalid_transcript_parent.json", total, failed, completed); - checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_04_invalid_exon_parent.json", total, failed, completed); + checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_04_invalid_exon_parent.json", 1, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_05_invalid_cds_parent.json", total, failed, completed); } From e2d1ec69ac59baa8519a42f44941e73a80fe2c9a Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Wed, 4 Sep 2024 07:42:34 -0600 Subject: [PATCH 11/12] Fix IT's --- .../alliancegenome/curation_api/Gff3BulkUploadITCase.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java index a586b8056..2268e4d18 100644 --- a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java @@ -187,7 +187,7 @@ public void gff3DataBulkUploadMissingRequiredFields() throws Exception { checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_03_no_end.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_04_no_strand.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_05_no_transcript_parent.json", total, failed, completed); - checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_06_no_exon_parent.json", 1, failed, completed); + checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_06_no_exon_parent.json", 1, failed, 1); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_07_no_cds_parent.json", total, failed, completed); } @@ -200,7 +200,7 @@ public void gff3DataBulkUploadEmptyRequiredFields() throws Exception { checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_01_empty_seq_id.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_02_empty_strand.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_03_empty_transcript_parent.json", total, failed, completed); - checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_04_empty_exon_parent.json", 1, failed, completed); + checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_04_empty_exon_parent.json", 1, failed, 1); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_05_empty_cds_parent.json", total, failed, completed); } @@ -213,7 +213,7 @@ public void gff3DataBulkUploadInvalidFields() throws Exception { checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_01_invalid_strand.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_02_invalid_phase.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_03_invalid_transcript_parent.json", total, failed, completed); - checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_04_invalid_exon_parent.json", 1, failed, completed); + checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_04_invalid_exon_parent.json", 1, failed, 1); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_05_invalid_cds_parent.json", total, failed, completed); } From dea102d8c92a0c9ca3ff0c03bdd7827b57f89d66 Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Wed, 4 Sep 2024 08:08:29 -0600 Subject: [PATCH 12/12] Fix IT's --- .../alliancegenome/curation_api/Gff3BulkUploadITCase.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java index 2268e4d18..7d9e8fc0b 100644 --- a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java @@ -188,7 +188,7 @@ public void gff3DataBulkUploadMissingRequiredFields() throws Exception { checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_04_no_strand.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_05_no_transcript_parent.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_06_no_exon_parent.json", 1, failed, 1); - checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_07_no_cds_parent.json", total, failed, completed); + checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_07_no_cds_parent.json", 1, failed, 1); } @Test @@ -201,7 +201,7 @@ public void gff3DataBulkUploadEmptyRequiredFields() throws Exception { checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_02_empty_strand.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_03_empty_transcript_parent.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_04_empty_exon_parent.json", 1, failed, 1); - checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_05_empty_cds_parent.json", total, failed, completed); + checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_05_empty_cds_parent.json", 1, failed, 1); } @Test @@ -214,7 +214,7 @@ public void gff3DataBulkUploadInvalidFields() throws Exception { checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_02_invalid_phase.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_03_invalid_transcript_parent.json", total, failed, completed); checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_04_invalid_exon_parent.json", 1, failed, 1); - checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_05_invalid_cds_parent.json", total, failed, completed); + checkFailedBulkLoad(transcriptBulkPostEndpoint, gffDataTestFilePath + "IV_05_invalid_cds_parent.json", 1, failed, 1); } }