Skip to content

Commit

Permalink
Merge pull request #1476 from phac-nml/filesystem-cleanup-delete-file…
Browse files Browse the repository at this point in the history
…-from-sample

Delete sequence files from file storage when removed from sample
  • Loading branch information
ericenns authored Jun 1, 2023
2 parents cf9eaa6 + c753424 commit 9b07817
Show file tree
Hide file tree
Showing 10 changed files with 277 additions and 75 deletions.
14 changes: 8 additions & 6 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@

# Changelog
## [23.01.3] - 2023/05/09
* [Developer]: Fixed issue with metadata uploader removing exiting data. See [PR 1489](https://github.com/phac-nml/irida/pull/1489)
## [Unreleased]
* [Developer]: Added functionality to delete sequence files from file system when a sequence run is removed. [See PR 1468](https://github.com/phac-nml/irida/pull/1468)
* [Developer]: Added script to do initial cleanup of sequence files from file system. [See PR 1469](https://github.com/phac-nml/irida/pull/1469)
* [UI]: Add identifier to project drop-down on project synchronization page. See [PR 1474](https://github.com/phac-nml/irida/pull/1474)
* [Developer]: Added functionality to delete sequence files from file storage when removed from sample. [See PR 1476](https://github.com/phac-nml/irida/pull/1476)

## [23.01.3] - 2023/05/09
* [Developer]: Fixed issue with metadata uploader removing existing data. See [PR 1489](https://github.com/phac-nml/irida/pull/1489)
*
## [23.01.2] - 2023/04/17
* [UI]: Fixed bug that caused all metadata fields to be removed when single field was removed from a template. See [PR 1482](https://github.com/phac-nml/irida/pull/1482)
* [Developer]: Fixed bug which allowed duplicated entries in the user_group_project table which prevented the user group from being removed. Fixed bug which was preventing analyses with `html` file outputs from completing. See [PR 1483](https://github.com/phac-nml/irida/pull/1483)
Expand All @@ -29,9 +34,6 @@
* [Developer]: Deprecated "/api/projects/{projectId}/samples/bySequencerId/{seqeuncerId}" in favour of "/api/projects/{projectId}/samples/bySampleName", which accepts a json property "sampleName"
* [Developer]: Fixed bug in setting a `default_sequencing_object and default_genome_assembly to `NULL` for a sample when the default sequencing object or genome assembly were removed. [See PR 1466](https://github.com/phac-nml/irida/pull/1466)
* [Developer]: Fixed bug preventing a `sample` with an analysis submission from being deleted. [See PR 1467](https://github.com/phac-nml/irida/pull/1467)
* [Developer]: Added functionality to delete sequence files from file system when a sequence run is removed. [See PR 1468](https://github.com/phac-nml/irida/pull/1468)
* [Developer]: Added script to do initial cleanup of sequence files from file system. [See PR 1469](https://github.com/phac-nml/irida/pull/1469)
* [UI]: Add identifier to project drop-down on project synchronization page. See [PR 1474](https://github.com/phac-nml/irida/pull/1474)

## [22.09.7] - 2023/01/24
* [UI]: Fixed bugs on NCBI Export page preventing the NCBI `submission.xml` file from being properly written. See [PR 1451](https://github.com/phac-nml/irida/pull/1451)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
import ca.corefacility.bioinformatics.irida.model.IridaThing;

/**
* Class storing when 2 {@link SequencingObject}s are concatenated into a new
* {@link SequencingObject}
* Class storing when 2 {@link SequencingObject}s are concatenated into a new {@link SequencingObject}
*/
@Entity
@Table(name = "sequence_concatenation")
Expand All @@ -30,14 +29,22 @@ public class SequenceConcatenation implements IridaThing {
private Date createdDate;

@ManyToMany
@JoinTable(joinColumns = @JoinColumn(name="sequence_concatenation_id"))
@JoinTable(joinColumns = @JoinColumn(name = "sequence_concatenation_id"))
@NotNull
private final List<SequencingObject> sources;

@OneToOne
@NotNull
private final SequencingObject concatenated;

/**
* Default constructor needed by Hibernate.
*/
public SequenceConcatenation() {
this.sources = null;
this.concatenated = null;
}

public SequenceConcatenation(SequencingObject concatenated, List<SequencingObject> sources) {
this.concatenated = concatenated;
this.sources = sources;
Expand All @@ -58,4 +65,15 @@ public Long getId() {
return id;
}

public List<SequencingObject> getSources() {
return sources;
}

public SequencingObject getConcatenated() {
return concatenated;
}

public void removeSource(SequencingObject object) {
sources.removeIf(source -> source.getId().equals(object.getId()));
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,34 @@
package ca.corefacility.bioinformatics.irida.repositories.sequencefile;

import java.util.Set;

import org.springframework.data.jpa.repository.Query;

import ca.corefacility.bioinformatics.irida.model.sequenceFile.SequenceConcatenation;
import ca.corefacility.bioinformatics.irida.model.sequenceFile.SequencingObject;
import ca.corefacility.bioinformatics.irida.repositories.IridaJpaRepository;

/**
* Repository for {@link SequenceConcatenation}s
*/
public interface SequenceConcatenationRepository extends IridaJpaRepository<SequenceConcatenation, Long> {

/**
* Get the {@link SequenceConcatenation} for a given {@link SequencingObject}
*
* @param sequencingObject the {@link SequencingObject}
* @return a {@link SequenceConcatenation}
*/
@Query("select sc from SequenceConcatenation sc where sc.concatenated = ?1")
public SequenceConcatenation findConcatenatedSequencingObject(SequencingObject sequencingObject);

/**
* Get a set of {@link SequenceConcatenation}s if the given {@link SequencingObject} is a source
*
* @param sequencingObject the {@link SequencingObject}
* @return the set of {@link SequenceConcatenation}s
*/
@Query("select sc from SequenceConcatenation sc where ?1 IN elements(sc.sources)")
public Set<SequenceConcatenation> findConcatenatedSequencingObjectsBySource(SequencingObject sequencingObject);

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

import org.springframework.data.jpa.repository.Modifying;
import org.springframework.data.jpa.repository.Query;
import org.springframework.transaction.annotation.Transactional;

import ca.corefacility.bioinformatics.irida.model.run.SequencingRun;
import ca.corefacility.bioinformatics.irida.model.sequenceFile.SequencingObject;
import ca.corefacility.bioinformatics.irida.model.workflow.submission.AnalysisSubmission;
import ca.corefacility.bioinformatics.irida.repositories.IridaJpaRepository;
import org.springframework.transaction.annotation.Transactional;

/**
* Repository for storing and retrieving {@link SequencingObject}s
Expand All @@ -27,8 +27,7 @@ public interface SequencingObjectRepository extends IridaJpaRepository<Sequencin
public Set<SequencingObject> findSequencingObjectsForSequencingRun(SequencingRun sequencingRun);

/**
* Get the {@link SequencingObject}s associated with a given
* {@link AnalysisSubmission}
* Get the {@link SequencingObject}s associated with a given {@link AnalysisSubmission}
*
* @param analysisSubmission the {@link AnalysisSubmission}
* @return the set of associated {@link SequencingObject}s
Expand All @@ -37,7 +36,8 @@ public interface SequencingObjectRepository extends IridaJpaRepository<Sequencin
public Set<SequencingObject> findSequencingObjectsForAnalysisSubmission(AnalysisSubmission analysisSubmission);

/**
* Get all {@link SequencingObject}s with the given {@link ca.corefacility.bioinformatics.irida.model.sequenceFile.SequencingObject.ProcessingState}
* Get all {@link SequencingObject}s with the given
* {@link ca.corefacility.bioinformatics.irida.model.sequenceFile.SequencingObject.ProcessingState}
*
* @param processingState the state to get files for
* @return a list of {@link SequencingObject}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import ca.corefacility.bioinformatics.irida.model.sample.SampleSequencingObjectJoin;
import ca.corefacility.bioinformatics.irida.model.sample.metadata.MetadataEntry;
import ca.corefacility.bioinformatics.irida.model.sample.metadata.ProjectMetadataResponse;
import ca.corefacility.bioinformatics.irida.model.sequenceFile.SequenceConcatenation;
import ca.corefacility.bioinformatics.irida.model.sequenceFile.SequenceFile;
import ca.corefacility.bioinformatics.irida.model.sequenceFile.SequencingObject;
import ca.corefacility.bioinformatics.irida.model.user.User;
Expand All @@ -49,12 +50,15 @@
import ca.corefacility.bioinformatics.irida.model.workflow.analysis.AnalysisFastQC;
import ca.corefacility.bioinformatics.irida.model.workflow.submission.AnalysisSubmission;
import ca.corefacility.bioinformatics.irida.repositories.analysis.AnalysisRepository;
import ca.corefacility.bioinformatics.irida.repositories.analysis.submission.AnalysisSubmissionRepository;
import ca.corefacility.bioinformatics.irida.repositories.joins.project.ProjectSampleJoinRepository;
import ca.corefacility.bioinformatics.irida.repositories.joins.sample.SampleGenomeAssemblyJoinRepository;
import ca.corefacility.bioinformatics.irida.repositories.joins.sample.SampleSequencingObjectJoinRepository;
import ca.corefacility.bioinformatics.irida.repositories.sample.MetadataEntryRepository;
import ca.corefacility.bioinformatics.irida.repositories.sample.QCEntryRepository;
import ca.corefacility.bioinformatics.irida.repositories.sample.SampleRepository;
import ca.corefacility.bioinformatics.irida.repositories.sequencefile.SequenceConcatenationRepository;
import ca.corefacility.bioinformatics.irida.repositories.sequencefile.SequenceFileRepository;
import ca.corefacility.bioinformatics.irida.repositories.sequencefile.SequencingObjectRepository;
import ca.corefacility.bioinformatics.irida.repositories.specification.ProjectSampleJoinSpecification;
import ca.corefacility.bioinformatics.irida.repositories.specification.SearchCriteria;
Expand Down Expand Up @@ -91,6 +95,7 @@ public class SampleServiceImpl extends CRUDServiceImpl<Long, Sample> implements
private QCEntryRepository qcEntryRepository;

private SequencingObjectRepository sequencingObjectRepository;
private SequenceConcatenationRepository concatenationRepository;

/**
* Reference to {@link AnalysisRepository}.
Expand All @@ -102,6 +107,8 @@ public class SampleServiceImpl extends CRUDServiceImpl<Long, Sample> implements
private final UserRepository userRepository;

private final MetadataEntryRepository metadataEntryRepository;
private final SequenceFileRepository sequenceFileRepository;
private final AnalysisSubmissionRepository submissionRepository;

/**
* Constructor.
Expand All @@ -111,28 +118,36 @@ public class SampleServiceImpl extends CRUDServiceImpl<Long, Sample> implements
* @param analysisRepository the analysis repository.
* @param ssoRepository The {@link SampleSequencingObjectJoin} repository
* @param sequencingObjectRepository the {@link SequencingObject} repository
* @param concatenationRepository the {@link SequenceConcatenationRepository} repository
* @param qcEntryRepository a repository for storing and reading {@link QCEntry}
* @param sampleGenomeAssemblyJoinRepository A {@link SampleGenomeAssemblyJoinRepository}
* @param userRepository A {@link UserRepository}
* @param metadataEntryRepository A {@link MetadataEntryRepository}
* @param sequenceFileRepository A {@link SequenceFileRepository}
* @param submissionRepository A {@link AnalysisSubmissionRepository}
* @param validator validator.
*/
@Autowired
public SampleServiceImpl(SampleRepository sampleRepository, ProjectSampleJoinRepository psjRepository,
final AnalysisRepository analysisRepository, SampleSequencingObjectJoinRepository ssoRepository,
QCEntryRepository qcEntryRepository, SequencingObjectRepository sequencingObjectRepository,
SequenceConcatenationRepository concatenationRepository,
SampleGenomeAssemblyJoinRepository sampleGenomeAssemblyJoinRepository, UserRepository userRepository,
MetadataEntryRepository metadataEntryRepository, Validator validator) {
MetadataEntryRepository metadataEntryRepository, SequenceFileRepository sequenceFileRepository,
AnalysisSubmissionRepository submissionRepository, Validator validator) {
super(sampleRepository, validator, Sample.class);
this.sampleRepository = sampleRepository;
this.psjRepository = psjRepository;
this.analysisRepository = analysisRepository;
this.ssoRepository = ssoRepository;
this.qcEntryRepository = qcEntryRepository;
this.sequencingObjectRepository = sequencingObjectRepository;
this.concatenationRepository = concatenationRepository;
this.userRepository = userRepository;
this.sampleGenomeAssemblyJoinRepository = sampleGenomeAssemblyJoinRepository;
this.metadataEntryRepository = metadataEntryRepository;
this.sequenceFileRepository = sequenceFileRepository;
this.submissionRepository = submissionRepository;
}

/**
Expand Down Expand Up @@ -363,10 +378,20 @@ public Map<String, List<Long>> getSampleIdsBySampleNameForProjects(List<Long> pr
public void removeSequencingObjectFromSample(Sample sample, SequencingObject object) {
SampleSequencingObjectJoin readObjectForSample = ssoRepository.readObjectForSample(sample, object.getId());
ssoRepository.delete(readObjectForSample);
if (sample.getDefaultSequencingObject() != null
&& sample.getDefaultSequencingObject().getId().equals(object.getId())) {
if (sample.getDefaultSequencingObject() != null && sample.getDefaultSequencingObject()
.getId()
.equals(object.getId())) {
sampleRepository.removeDefaultSequencingObject(sample);
}
Set<AnalysisSubmission> submissions = submissionRepository.findAnalysisSubmissionsForSequencingObject(object);
if (submissions.isEmpty() && object.getSequencingRun() == null) {
removeSequencingObjectAsConcatenationSource(object);
removeConcatenatedSequencingObjectForSequencingObject(object);
for (SequenceFile file : object.getFiles()) {
sequenceFileRepository.delete(file);
}
sequencingObjectRepository.delete(object);
}
}

/**
Expand Down Expand Up @@ -843,4 +868,32 @@ public Map<Long, Long> getCoverageForSamplesInProject(Project project, List<Long
.collect(HashMap::new, (sampleCoverageMap, sampleCoverageTuple) -> sampleCoverageMap.put(
(Long) sampleCoverageTuple.get(0), (Long) sampleCoverageTuple.get(1)), Map::putAll);
}

/**
* Removes the {@link SequencingObject} database entries, if it is a source of a concatenated sequencing object.
*
* @param object The {@link SequencingObject} to be removed
*/
@Transactional
private void removeSequencingObjectAsConcatenationSource(SequencingObject object) {
Set<SequenceConcatenation> concatenations = concatenationRepository.findConcatenatedSequencingObjectsBySource(
object);
for (SequenceConcatenation concat : concatenations) {
concat.removeSource(object);
concatenationRepository.save(concat);
}
}

/**
* Removes the {@link SequencingObject} database entry, if it is the result of a concatenated sequencing object.
*
* @param object The {@link SequencingObject} to be removed
*/
@Transactional
private void removeConcatenatedSequencingObjectForSequencingObject(SequencingObject object) {
SequenceConcatenation concatenated = concatenationRepository.findConcatenatedSequencingObject(object);
if (concatenated != null) {
concatenationRepository.delete(concatenated);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ public Page<ProjectSampleJoin> getSamplesForProjectWithName(Project project, Str
* @param sample {@link Sample} to remove sequences from
* @param object {@link SequencingObject} to remove
*/
public void removeSequencingObjectFromSample(Sample sample, SequencingObject object) throws Exception;
public void removeSequencingObjectFromSample(Sample sample, SequencingObject object);

/**
* Merge multiple samples into one. Merging samples copies the {@link SequenceFile} references from the set of
Expand Down Expand Up @@ -328,7 +328,7 @@ public Page<ProjectSampleJoin> searchAllSamples(String query, final Integer page

/**
* Get Coverage for samples within a project.
*
*
* @param project the {@link Project} to use to calculate the coverage
* @param sampleIds the {@link Sample} ids to get coverage for
* @return a map of sample id to coverage value
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>

<databaseChangeLog xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-3.1.xsd">
</databaseChangeLog>
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-3.1.xsd">
</databaseChangeLog>
Loading

0 comments on commit 9b07817

Please sign in to comment.