Skip to content

Commit

Permalink
SCRUM-4190 GAF load (#1691)
Browse files Browse the repository at this point in the history
* SCRUM-4190 GAF load

* refactor to harmonize the Java model with the linkML model

* remove unused imports

* rename flyway file

* add Schemaversion annotation

* rename flyway file

* refactor

* remove unused imports

* add columns to GeneOntologyAnnotation

* add columns to GeneOntologyAnnotation

* add columns to GeneOntologyAnnotation

* add columns to GeneOntologyAnnotation table

* rename sequence

* SCRUM-4190 rename service and dao class

* cleanup runcleanup methods

* SCRUM-4190 refactoring, adding indexes to id columns.

* remove unneccessary semicolon

* SCRUM-4190 refactor according to PR review

* refactor

* remove unused import

* consolidate return lines
  • Loading branch information
cmpich authored Nov 6, 2024
1 parent af86c3f commit 0010c23
Show file tree
Hide file tree
Showing 11 changed files with 619 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/main/cliapp/src/service/DataLoadService.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ export class DataLoadService extends BaseAuthService {
'DISEASE_ANNOTATION',
'RESOURCE_DESCRIPTOR',
'EXPRESSION_ATLAS',
'GAF',
],
BulkManualLoad: [
'FULL_INGEST',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.alliancegenome.curation_api.dao;

import jakarta.enterprise.context.ApplicationScoped;
import jakarta.persistence.Query;
import org.alliancegenome.curation_api.dao.base.BaseSQLDAO;
import org.alliancegenome.curation_api.model.entities.GeneOntologyAnnotation;
import org.alliancegenome.curation_api.model.entities.Organization;
import org.alliancegenome.curation_api.model.ingest.dto.GeneOntologyAnnotationDTO;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

@ApplicationScoped
public class GeneOntologyAnnotationDAO extends BaseSQLDAO<GeneOntologyAnnotation> {

protected GeneOntologyAnnotationDAO() {
super(GeneOntologyAnnotation.class);
}

public GeneOntologyAnnotation persistGeneGoAssociation(GeneOntologyAnnotation gaf) {
String sql = """
insert into GeneOntologyAnnotation (id, singlegene_id,goterm_id)
VALUES (nextval('GeneOntologyAnnotation_SEQ'), :geneID, :goID)
""";
Query query = entityManager.createNativeQuery(sql);
query.setParameter("goID", gaf.getGoTerm().getId());
query.setParameter("geneID", gaf.getSingleGene().getId());
query.executeUpdate();

sql = "select currval('GeneOntologyAnnotation_SEQ')";
Object object = entityManager.createNativeQuery(sql).getSingleResult();
gaf.setId((Long) object);
return gaf;
}

public Map<Long, GeneOntologyAnnotationDTO> getAllGafIdsPerProvider(Organization sourceOrganization) {
Query query = entityManager.createNativeQuery("""
select gga.id, be.modentityid, ot.curie
from GeneOntologyAnnotation as gga , BiologicalEntity as be, ontologyterm as ot,
species as spec
where gga.singlegene_id = be.id
and be.taxon_id = spec.taxon_id
and spec.displayname = :speciesName
and gga.goterm_id = ot.id
""");
query.setParameter("speciesName", sourceOrganization.getAbbreviation());
List<Object[]> result = query.getResultList();
Map<Long, GeneOntologyAnnotationDTO> map = new HashMap<>();
result.forEach(object -> {
GeneOntologyAnnotationDTO dto = new GeneOntologyAnnotationDTO();
dto.setGeneIdentifier((String) object[1]);
dto.setGoTermCurie((String) object[2]);
map.put((Long) object[0], dto);
});
return map;
}

}
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
package org.alliancegenome.curation_api.dao.ontology;

import jakarta.enterprise.context.ApplicationScoped;
import jakarta.persistence.Query;
import org.alliancegenome.curation_api.dao.base.BaseSQLDAO;
import org.alliancegenome.curation_api.model.entities.ontology.GOTerm;

import jakarta.enterprise.context.ApplicationScoped;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

@ApplicationScoped
public class GoTermDAO extends BaseSQLDAO<GOTerm> {
Expand All @@ -12,4 +16,19 @@ protected GoTermDAO() {
super(GOTerm.class);
}

public Map<String, Long> getAllGOIds() {
String sql = """
select id, curie
from ontologyterm
where ontologytermtype = :type
""";
Query query = entityManager.createNativeQuery(sql);
query.setParameter("type", "GOTerm");
List<Object[]> objects = query.getResultList();
Map<String, Long> ensemblGeneMap = new HashMap<>();
objects.forEach(object -> {
ensemblGeneMap.put((String) object[1], (Long) object[0]);
});
return ensemblGeneMap;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public enum BackendBulkLoadType {

INTERACTION_MOL("tsv"),
EXPRESSION_ATLAS("tsv"),
GAF("tsv"),
INTERACTION_GEN("tsv"),
BIOGRID_ORCS("tsv"),
PARALOGY("json"),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package org.alliancegenome.curation_api.interfaces.crud;

import jakarta.ws.rs.Consumes;
import jakarta.ws.rs.Path;
import jakarta.ws.rs.Produces;
import jakarta.ws.rs.core.MediaType;
import org.alliancegenome.curation_api.interfaces.base.crud.BaseCreateControllerInterface;
import org.alliancegenome.curation_api.model.entities.CrossReference;
import org.eclipse.microprofile.openapi.annotations.tags.Tag;

@Path("/gaf")
@Tag(name = "CRUD - GAF")
@Produces(MediaType.APPLICATION_JSON)
@Consumes(MediaType.APPLICATION_JSON)
public interface GeneOntologyAnnotationCrudInterface extends BaseCreateControllerInterface<CrossReference> {

}
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ public class BulkLoadJobExecutor {
@Inject VepGeneExecutor vepGeneExecutor;

@Inject ExpressionAtlasExecutor expressionAtlasExecutor;
@Inject
GeneOntologyAnnotationExecutor gafExecutor;

@Inject BiogridOrcExecutor biogridOrcExecutor;

Expand Down Expand Up @@ -148,6 +150,8 @@ public void process(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) th
vepGeneExecutor.execLoad(bulkLoadFileHistory);
} else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.HTPDATASAMPLE) {
htpExpressionDatasetSampleAnnotationExecutor.execLoad(bulkLoadFileHistory);
} else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.GAF) {
gafExecutor.execLoad(bulkLoadFileHistory);
} else {
log.info("Load: " + bulkLoadFileHistory.getBulkLoad().getName() + " for type " + bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() + " not implemented");
throw new Exception("Load: " + bulkLoadFileHistory.getBulkLoad().getName() + " for type " + bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() + " not implemented");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package org.alliancegenome.curation_api.jobs.executors;

import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import lombok.extern.jbosslog.JBossLog;
import org.alliancegenome.curation_api.exceptions.ObjectUpdateException;
import org.alliancegenome.curation_api.model.entities.GeneOntologyAnnotation;
import org.alliancegenome.curation_api.model.entities.Organization;
import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory;
import org.alliancegenome.curation_api.model.entities.bulkloads.BulkURLLoad;
import org.alliancegenome.curation_api.model.ingest.dto.GeneOntologyAnnotationDTO;
import org.alliancegenome.curation_api.services.GeneOntologyAnnotationService;
import org.alliancegenome.curation_api.services.OrganizationService;
import org.alliancegenome.curation_api.util.ProcessDisplayHelper;
import org.apache.commons.lang3.StringUtils;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.*;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;

@JBossLog
@ApplicationScoped
public class GeneOntologyAnnotationExecutor extends LoadFileExecutor {

@Inject
GeneOntologyAnnotationService service;
@Inject
OrganizationService organizationService;

public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) throws IOException {

String url = ((BulkURLLoad) bulkLoadFileHistory.getBulkLoad()).getBulkloadUrl();

String[] tok = url.split("/");
String orgAbbrev = tok[tok.length - 1].toUpperCase();
String abbr = orgAbbrev.split("\\.")[0];
Organization organization = organizationService.getByAbbr(abbr).getEntity();

// curie, List<GO curie>
Map<String, List<String>> uiMap = new HashMap<>();
Set<String> orgIDs = new HashSet<>();
GZIPInputStream stream = new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()));
try (BufferedReader br = new BufferedReader(new InputStreamReader(stream))) {
Stream<String> lines = br.lines();

// Process each line
lines.filter(s -> !s.startsWith("!") && StringUtils.isNotEmpty(s)).forEach(s -> {
String[] token = s.split("\t");
String orgID = token[0];
orgIDs.add(orgID);
String modID = token[1];
String goID = token[4];
if (abbr.equals(orgID)) {
List<String> goIDs = uiMap.computeIfAbsent(modID, list -> new ArrayList<>());
goIDs.add(goID);
}
});

} catch (IOException e) {
e.printStackTrace();
}

String name = bulkLoadFileHistory.getBulkLoad().getName();

Map<Long, GeneOntologyAnnotationDTO> gafMap = service.getGafMap(organization);
List<Long> gafIdsBefore = new ArrayList<>(gafMap.keySet().stream().toList());
gafIdsBefore.removeIf(Objects::isNull);

List<Long> geneGoIdsLoaded = new ArrayList<>();
ProcessDisplayHelper ph = new ProcessDisplayHelper();
ph.addDisplayHandler(loadProcessDisplayService);
List<GeneOntologyAnnotationDTO> dtos = uiMap.entrySet()
.stream()
.map(entry -> entry.getValue().stream().map(goID -> {
GeneOntologyAnnotationDTO dto = new GeneOntologyAnnotationDTO();
dto.setGeneIdentifier(abbr + ":" + entry.getKey());
dto.setGoTermCurie(goID);
return dto;
}).toList()).flatMap(Collection::stream).toList();

ph.startProcess(name, dtos.size());
for (GeneOntologyAnnotationDTO modID : dtos) {
Long geneID = service.getGeneID(modID, abbr);
if (geneID != null) {
GeneOntologyAnnotation newGaf = service.insert(modID, abbr).getEntity();
if (newGaf != null) {
geneGoIdsLoaded.add(newGaf.getId());
bulkLoadFileHistory.incrementCompleted();
} else {
bulkLoadFileHistory.incrementSkipped();
}
} else {
addException(bulkLoadFileHistory, new ObjectUpdateException.ObjectUpdateExceptionData(modID, "Could not find gene " + modID.getGeneIdentifier(), null));
bulkLoadFileHistory.incrementFailed();
}
ph.progressProcess();
}
bulkLoadFileHistory.setTotalCount(dtos.size());
runCleanup(service, bulkLoadFileHistory, abbr, gafIdsBefore, geneGoIdsLoaded, "GAF Load");
ph.finishProcess();
updateHistory(bulkLoadFileHistory);

bulkLoadFileHistory.finishLoad();
updateHistory(bulkLoadFileHistory);
updateExceptions(bulkLoadFileHistory);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package org.alliancegenome.curation_api.model.entities;

import com.fasterxml.jackson.annotation.JsonTypeName;
import jakarta.persistence.Entity;
import jakarta.persistence.ManyToOne;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants;
import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion;
import org.alliancegenome.curation_api.model.entities.base.AuditedObject;
import org.alliancegenome.curation_api.model.entities.ontology.GOTerm;
import org.eclipse.microprofile.openapi.annotations.media.Schema;

@Entity
@Data
@EqualsAndHashCode
@Schema(name = "Gene_Disease_Annotation", description = "Annotation class representing a gene disease annotation")
@JsonTypeName("GeneOntologyAnnotation")
@AGRCurationSchemaVersion(min = "2.8.0", max = LinkMLSchemaConstants.LATEST_RELEASE)
public class GeneOntologyAnnotation extends AuditedObject {

@ManyToOne
private GOTerm goTerm;
@ManyToOne
private Gene singleGene;

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package org.alliancegenome.curation_api.model.ingest.dto;

import lombok.Data;
import lombok.EqualsAndHashCode;
import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants;
import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion;
import org.alliancegenome.curation_api.model.entities.Annotation;

@Data
@EqualsAndHashCode(callSuper = false)
@AGRCurationSchemaVersion(min = "2.8.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = {Annotation.class})
public class GeneOntologyAnnotationDTO {

private String geneIdentifier;

private String goTermCurie;


}
Loading

0 comments on commit 0010c23

Please sign in to comment.