-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* SCRUM-4190 GAF load * refactor to harmonize the Java model with the linkML model * remove unused imports * rename flyway file * add Schemaversion annotation * rename flyway file * refactor * remove unused imports * add columns to GeneOntologyAnnotation * add columns to GeneOntologyAnnotation * add columns to GeneOntologyAnnotation * add columns to GeneOntologyAnnotation table * rename sequence * SCRUM-4190 rename service and dao class * cleanup runcleanup methods * SCRUM-4190 refactoring, adding indexes to id columns. * remove unneccessary semicolon * SCRUM-4190 refactor according to PR review * refactor * remove unused import * consolidate return lines
- Loading branch information
Showing
11 changed files
with
619 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
59 changes: 59 additions & 0 deletions
59
src/main/java/org/alliancegenome/curation_api/dao/GeneOntologyAnnotationDAO.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package org.alliancegenome.curation_api.dao; | ||
|
||
import jakarta.enterprise.context.ApplicationScoped; | ||
import jakarta.persistence.Query; | ||
import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; | ||
import org.alliancegenome.curation_api.model.entities.GeneOntologyAnnotation; | ||
import org.alliancegenome.curation_api.model.entities.Organization; | ||
import org.alliancegenome.curation_api.model.ingest.dto.GeneOntologyAnnotationDTO; | ||
|
||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
@ApplicationScoped | ||
public class GeneOntologyAnnotationDAO extends BaseSQLDAO<GeneOntologyAnnotation> { | ||
|
||
protected GeneOntologyAnnotationDAO() { | ||
super(GeneOntologyAnnotation.class); | ||
} | ||
|
||
public GeneOntologyAnnotation persistGeneGoAssociation(GeneOntologyAnnotation gaf) { | ||
String sql = """ | ||
insert into GeneOntologyAnnotation (id, singlegene_id,goterm_id) | ||
VALUES (nextval('GeneOntologyAnnotation_SEQ'), :geneID, :goID) | ||
"""; | ||
Query query = entityManager.createNativeQuery(sql); | ||
query.setParameter("goID", gaf.getGoTerm().getId()); | ||
query.setParameter("geneID", gaf.getSingleGene().getId()); | ||
query.executeUpdate(); | ||
|
||
sql = "select currval('GeneOntologyAnnotation_SEQ')"; | ||
Object object = entityManager.createNativeQuery(sql).getSingleResult(); | ||
gaf.setId((Long) object); | ||
return gaf; | ||
} | ||
|
||
public Map<Long, GeneOntologyAnnotationDTO> getAllGafIdsPerProvider(Organization sourceOrganization) { | ||
Query query = entityManager.createNativeQuery(""" | ||
select gga.id, be.modentityid, ot.curie | ||
from GeneOntologyAnnotation as gga , BiologicalEntity as be, ontologyterm as ot, | ||
species as spec | ||
where gga.singlegene_id = be.id | ||
and be.taxon_id = spec.taxon_id | ||
and spec.displayname = :speciesName | ||
and gga.goterm_id = ot.id | ||
"""); | ||
query.setParameter("speciesName", sourceOrganization.getAbbreviation()); | ||
List<Object[]> result = query.getResultList(); | ||
Map<Long, GeneOntologyAnnotationDTO> map = new HashMap<>(); | ||
result.forEach(object -> { | ||
GeneOntologyAnnotationDTO dto = new GeneOntologyAnnotationDTO(); | ||
dto.setGeneIdentifier((String) object[1]); | ||
dto.setGoTermCurie((String) object[2]); | ||
map.put((Long) object[0], dto); | ||
}); | ||
return map; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
17 changes: 17 additions & 0 deletions
17
.../org/alliancegenome/curation_api/interfaces/crud/GeneOntologyAnnotationCrudInterface.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package org.alliancegenome.curation_api.interfaces.crud; | ||
|
||
import jakarta.ws.rs.Consumes; | ||
import jakarta.ws.rs.Path; | ||
import jakarta.ws.rs.Produces; | ||
import jakarta.ws.rs.core.MediaType; | ||
import org.alliancegenome.curation_api.interfaces.base.crud.BaseCreateControllerInterface; | ||
import org.alliancegenome.curation_api.model.entities.CrossReference; | ||
import org.eclipse.microprofile.openapi.annotations.tags.Tag; | ||
|
||
@Path("/gaf") | ||
@Tag(name = "CRUD - GAF") | ||
@Produces(MediaType.APPLICATION_JSON) | ||
@Consumes(MediaType.APPLICATION_JSON) | ||
public interface GeneOntologyAnnotationCrudInterface extends BaseCreateControllerInterface<CrossReference> { | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
112 changes: 112 additions & 0 deletions
112
...n/java/org/alliancegenome/curation_api/jobs/executors/GeneOntologyAnnotationExecutor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
package org.alliancegenome.curation_api.jobs.executors; | ||
|
||
import jakarta.enterprise.context.ApplicationScoped; | ||
import jakarta.inject.Inject; | ||
import lombok.extern.jbosslog.JBossLog; | ||
import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; | ||
import org.alliancegenome.curation_api.model.entities.GeneOntologyAnnotation; | ||
import org.alliancegenome.curation_api.model.entities.Organization; | ||
import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; | ||
import org.alliancegenome.curation_api.model.entities.bulkloads.BulkURLLoad; | ||
import org.alliancegenome.curation_api.model.ingest.dto.GeneOntologyAnnotationDTO; | ||
import org.alliancegenome.curation_api.services.GeneOntologyAnnotationService; | ||
import org.alliancegenome.curation_api.services.OrganizationService; | ||
import org.alliancegenome.curation_api.util.ProcessDisplayHelper; | ||
import org.apache.commons.lang3.StringUtils; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.FileInputStream; | ||
import java.io.IOException; | ||
import java.io.InputStreamReader; | ||
import java.util.*; | ||
import java.util.stream.Stream; | ||
import java.util.zip.GZIPInputStream; | ||
|
||
@JBossLog | ||
@ApplicationScoped | ||
public class GeneOntologyAnnotationExecutor extends LoadFileExecutor { | ||
|
||
@Inject | ||
GeneOntologyAnnotationService service; | ||
@Inject | ||
OrganizationService organizationService; | ||
|
||
public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) throws IOException { | ||
|
||
String url = ((BulkURLLoad) bulkLoadFileHistory.getBulkLoad()).getBulkloadUrl(); | ||
|
||
String[] tok = url.split("/"); | ||
String orgAbbrev = tok[tok.length - 1].toUpperCase(); | ||
String abbr = orgAbbrev.split("\\.")[0]; | ||
Organization organization = organizationService.getByAbbr(abbr).getEntity(); | ||
|
||
// curie, List<GO curie> | ||
Map<String, List<String>> uiMap = new HashMap<>(); | ||
Set<String> orgIDs = new HashSet<>(); | ||
GZIPInputStream stream = new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())); | ||
try (BufferedReader br = new BufferedReader(new InputStreamReader(stream))) { | ||
Stream<String> lines = br.lines(); | ||
|
||
// Process each line | ||
lines.filter(s -> !s.startsWith("!") && StringUtils.isNotEmpty(s)).forEach(s -> { | ||
String[] token = s.split("\t"); | ||
String orgID = token[0]; | ||
orgIDs.add(orgID); | ||
String modID = token[1]; | ||
String goID = token[4]; | ||
if (abbr.equals(orgID)) { | ||
List<String> goIDs = uiMap.computeIfAbsent(modID, list -> new ArrayList<>()); | ||
goIDs.add(goID); | ||
} | ||
}); | ||
|
||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
} | ||
|
||
String name = bulkLoadFileHistory.getBulkLoad().getName(); | ||
|
||
Map<Long, GeneOntologyAnnotationDTO> gafMap = service.getGafMap(organization); | ||
List<Long> gafIdsBefore = new ArrayList<>(gafMap.keySet().stream().toList()); | ||
gafIdsBefore.removeIf(Objects::isNull); | ||
|
||
List<Long> geneGoIdsLoaded = new ArrayList<>(); | ||
ProcessDisplayHelper ph = new ProcessDisplayHelper(); | ||
ph.addDisplayHandler(loadProcessDisplayService); | ||
List<GeneOntologyAnnotationDTO> dtos = uiMap.entrySet() | ||
.stream() | ||
.map(entry -> entry.getValue().stream().map(goID -> { | ||
GeneOntologyAnnotationDTO dto = new GeneOntologyAnnotationDTO(); | ||
dto.setGeneIdentifier(abbr + ":" + entry.getKey()); | ||
dto.setGoTermCurie(goID); | ||
return dto; | ||
}).toList()).flatMap(Collection::stream).toList(); | ||
|
||
ph.startProcess(name, dtos.size()); | ||
for (GeneOntologyAnnotationDTO modID : dtos) { | ||
Long geneID = service.getGeneID(modID, abbr); | ||
if (geneID != null) { | ||
GeneOntologyAnnotation newGaf = service.insert(modID, abbr).getEntity(); | ||
if (newGaf != null) { | ||
geneGoIdsLoaded.add(newGaf.getId()); | ||
bulkLoadFileHistory.incrementCompleted(); | ||
} else { | ||
bulkLoadFileHistory.incrementSkipped(); | ||
} | ||
} else { | ||
addException(bulkLoadFileHistory, new ObjectUpdateException.ObjectUpdateExceptionData(modID, "Could not find gene " + modID.getGeneIdentifier(), null)); | ||
bulkLoadFileHistory.incrementFailed(); | ||
} | ||
ph.progressProcess(); | ||
} | ||
bulkLoadFileHistory.setTotalCount(dtos.size()); | ||
runCleanup(service, bulkLoadFileHistory, abbr, gafIdsBefore, geneGoIdsLoaded, "GAF Load"); | ||
ph.finishProcess(); | ||
updateHistory(bulkLoadFileHistory); | ||
|
||
bulkLoadFileHistory.finishLoad(); | ||
updateHistory(bulkLoadFileHistory); | ||
updateExceptions(bulkLoadFileHistory); | ||
} | ||
|
||
} |
27 changes: 27 additions & 0 deletions
27
src/main/java/org/alliancegenome/curation_api/model/entities/GeneOntologyAnnotation.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
package org.alliancegenome.curation_api.model.entities; | ||
|
||
import com.fasterxml.jackson.annotation.JsonTypeName; | ||
import jakarta.persistence.Entity; | ||
import jakarta.persistence.ManyToOne; | ||
import lombok.Data; | ||
import lombok.EqualsAndHashCode; | ||
import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; | ||
import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; | ||
import org.alliancegenome.curation_api.model.entities.base.AuditedObject; | ||
import org.alliancegenome.curation_api.model.entities.ontology.GOTerm; | ||
import org.eclipse.microprofile.openapi.annotations.media.Schema; | ||
|
||
@Entity | ||
@Data | ||
@EqualsAndHashCode | ||
@Schema(name = "Gene_Disease_Annotation", description = "Annotation class representing a gene disease annotation") | ||
@JsonTypeName("GeneOntologyAnnotation") | ||
@AGRCurationSchemaVersion(min = "2.8.0", max = LinkMLSchemaConstants.LATEST_RELEASE) | ||
public class GeneOntologyAnnotation extends AuditedObject { | ||
|
||
@ManyToOne | ||
private GOTerm goTerm; | ||
@ManyToOne | ||
private Gene singleGene; | ||
|
||
} |
19 changes: 19 additions & 0 deletions
19
...main/java/org/alliancegenome/curation_api/model/ingest/dto/GeneOntologyAnnotationDTO.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
package org.alliancegenome.curation_api.model.ingest.dto; | ||
|
||
import lombok.Data; | ||
import lombok.EqualsAndHashCode; | ||
import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; | ||
import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; | ||
import org.alliancegenome.curation_api.model.entities.Annotation; | ||
|
||
@Data | ||
@EqualsAndHashCode(callSuper = false) | ||
@AGRCurationSchemaVersion(min = "2.8.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = {Annotation.class}) | ||
public class GeneOntologyAnnotationDTO { | ||
|
||
private String geneIdentifier; | ||
|
||
private String goTermCurie; | ||
|
||
|
||
} |
Oops, something went wrong.