Skip to content

Commit

Permalink
Added Called Legacy Segment code (#5115)
Browse files Browse the repository at this point in the history
- Added functionality to generate legacy format (IGV-compatible) seg files automatically when calling copy ratio segments.  Closes #5114 
- Updates the WDL as well to expose the new output files.
  • Loading branch information
LeeTL1220 authored Aug 28, 2018
1 parent fea2c8d commit 63d4287
Show file tree
Hide file tree
Showing 6 changed files with 196 additions and 4 deletions.
3 changes: 3 additions & 0 deletions scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ workflow CNVSomaticPairWorkflow {
File copy_ratio_parameters_tumor = ModelSegmentsTumor.copy_ratio_parameters
File allele_fraction_parameters_tumor = ModelSegmentsTumor.allele_fraction_parameters
File called_copy_ratio_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_segments
File called_copy_ratio_legacy_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_legacy_segments
File denoised_copy_ratios_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_plot
File denoised_copy_ratios_lim_4_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_lim_4_plot
File standardized_MAD_tumor = PlotDenoisedCopyRatiosTumor.standardized_MAD
Expand All @@ -472,6 +473,7 @@ workflow CNVSomaticPairWorkflow {
File? copy_ratio_parameters_normal = ModelSegmentsNormal.copy_ratio_parameters
File? allele_fraction_parameters_normal = ModelSegmentsNormal.allele_fraction_parameters
File? called_copy_ratio_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_segments
File? called_copy_ratio_legacy_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_legacy_segments
File? denoised_copy_ratios_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_plot
File? denoised_copy_ratios_lim_4_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_lim_4_plot
File? standardized_MAD_normal = PlotDenoisedCopyRatiosNormal.standardized_MAD
Expand Down Expand Up @@ -673,6 +675,7 @@ task CallCopyRatioSegments {

output {
File called_copy_ratio_segments = "${entity_id}.called.seg"
File called_copy_ratio_legacy_segments = "${entity_id}.called.igv.seg"
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.broadinstitute.hellbender.tools.copynumber;

import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.io.FilenameUtils;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.BetaFeature;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
Expand All @@ -9,10 +11,14 @@
import org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup;
import org.broadinstitute.hellbender.tools.copynumber.caller.SimpleCopyRatioCaller;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledCopyRatioSegmentCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledLegacySegmentCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CopyRatioSegmentCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledCopyRatioSegment;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledLegacySegment;
import org.broadinstitute.hellbender.utils.Utils;

import java.io.File;
import java.util.stream.Collectors;

/**
* Calls copy-ratio segments as amplified, deleted, or copy-number neutral.
Expand Down Expand Up @@ -75,7 +81,7 @@ public final class CallCopyRatioSegments extends CommandLineProgram {
public static final String NEUTRAL_SEGMENT_COPY_RATIO_UPPER_BOUND_LONG_NAME = "neutral-segment-copy-ratio-upper-bound";
public static final String OUTLIER_NEUTRAL_SEGMENT_COPY_RATIO_Z_SCORE_THRESHOLD_LONG_NAME = "outlier-neutral-segment-copy-ratio-z-score-threshold";
public static final String CALLING_COPY_RATIO_Z_SCORE_THRESHOLD_LONG_NAME = "calling-copy-ratio-z-score-threshold";

public static final String LEGACY_SEGMENTS_FILE_SUFFIX = ".igv.seg";
@Argument(
doc = "Input file containing copy-ratio segments (.cr.seg output of ModelSegments).",
fullName = StandardArgumentDefinitions.INPUT_LONG_NAME,
Expand Down Expand Up @@ -138,6 +144,25 @@ protected Object doWork() {
.makeCalls();
calledCopyRatioSegments.write(outputCalledCopyRatioSegmentsFile);

// Write an IGV compatible collection
final CalledLegacySegmentCollection legacySegmentCollection = createCalledLegacySegmentCollection(calledCopyRatioSegments);
legacySegmentCollection.write(createCalledLegacyOutputFilename(outputCalledCopyRatioSegmentsFile));

return "SUCCESS";
}

@VisibleForTesting
public static File createCalledLegacyOutputFilename(final File calledCopyRatioBaseFilename) {
return new File(FilenameUtils.removeExtension(calledCopyRatioBaseFilename.getAbsolutePath()) + LEGACY_SEGMENTS_FILE_SUFFIX);
}

private static CalledLegacySegmentCollection createCalledLegacySegmentCollection(final CalledCopyRatioSegmentCollection segments) {
return new CalledLegacySegmentCollection(segments.getMetadata(), segments.getRecords().stream()
.map(r -> convert(r, segments.getMetadata().getSampleName())).collect(Collectors.toList()));
}

private static CalledLegacySegment convert(final CalledCopyRatioSegment segment, final String sampleName) {
return new CalledLegacySegment(sampleName, segment.getInterval(), segment.getNumPoints(), segment.getMeanLog2CopyRatio(), segment.getCall());
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package org.broadinstitute.hellbender.tools.copynumber.formats.collections;

import org.apache.commons.lang3.StringUtils;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleLocatableMetadata;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledCopyRatioSegment;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledLegacySegment;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.tsv.DataLine;
import org.broadinstitute.hellbender.utils.tsv.TableColumnCollection;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.stream.Stream;

/**
* Represents a CBS-style segmentation to enable IGV-compatible plotting.
*
* IGV ignores column headers and requires that no other headers are present.
* We use the conventional CBS-style column headers (which includes the sample name)
* and suppress the SAM-style metadata header (which breaks the contract for construction from input files).
* See <a href="http://software.broadinstitute.org/cancer/software/genepattern/file-formats-guide#CBS">
* http://software.broadinstitute.org/cancer/software/genepattern/file-formats-guide#CBS</a>
* and <a href="https://software.broadinstitute.org/software/igv/SEG">
* https://software.broadinstitute.org/software/igv/SEG</a>.
*/
public final class CalledLegacySegmentCollection extends AbstractSampleLocatableCollection<CalledLegacySegment> {
//note to developers: repeat the column headers in Javadoc so that they are viewable when linked
/**
* Sample, Chromosome, Start, End, Num_Probes, Call, Segment_Mean
*/
enum CalledLegacySegmentTableColumn {
SAMPLE("Sample"),
CHROMOSOME("Chromosome"),
START("Start"),
END("End"),
NUM_PROBES("Num_Probes"),
CALL("Call"),
SEGMENT_MEAN("Segment_Mean");

private final String columnName;

CalledLegacySegmentTableColumn(final String columnName) {
this.columnName = columnName;
}

static final TableColumnCollection COLUMNS = new TableColumnCollection(
Stream.of(values()).map(c -> c.columnName).toArray());
}

private static final Function<DataLine, CalledLegacySegment> CALLED_LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION = dataLine -> {
final String sampleName = dataLine.get(CalledLegacySegmentTableColumn.SAMPLE.columnName);
final String contig = dataLine.get(CalledLegacySegmentTableColumn.CHROMOSOME.columnName);
final int start = dataLine.getInt(CalledLegacySegmentTableColumn.START.columnName);
final int end = dataLine.getInt(CalledLegacySegmentTableColumn.END.columnName);
final int numProbes = dataLine.getInt(CalledLegacySegmentTableColumn.NUM_PROBES.columnName);
final double segmentMean = dataLine.getDouble(CalledLegacySegmentTableColumn.SEGMENT_MEAN.columnName);
final String callOutputString = dataLine.get(CalledCopyRatioSegmentCollection.CalledCopyRatioSegmentTableColumn.CALL);
final CalledCopyRatioSegment.Call call = Arrays.stream(CalledCopyRatioSegment.Call.values())
.filter(c -> c.getOutputString().equals(callOutputString)).findFirst().orElseThrow(
() -> new UserException("Attempting to read an invalid value for " +
CalledLegacySegmentTableColumn.CALL +": " + callOutputString +
". Valid values are " + StringUtils.join(CalledCopyRatioSegment.Call.values(), ", ")
));
final SimpleInterval interval = new SimpleInterval(contig, start, end);
return new CalledLegacySegment(sampleName, interval, numProbes, segmentMean, call);
};

private static final BiConsumer<CalledLegacySegment, DataLine> CALLED_LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER = (calledLegacySegment, dataLine) ->
dataLine.append(calledLegacySegment.getSampleName())
.append(calledLegacySegment.getContig())
.append(calledLegacySegment.getStart())
.append(calledLegacySegment.getEnd())
.append(calledLegacySegment.getNumProbes())
.append(calledLegacySegment.getCall().getOutputString())
.append(formatDouble(calledLegacySegment.getSegmentMean()));

public CalledLegacySegmentCollection(final SampleLocatableMetadata metadata,
final List<CalledLegacySegment> calledLegacySegments) {
super(metadata, calledLegacySegments, CalledLegacySegmentTableColumn.COLUMNS, CALLED_LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, CALLED_LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER);
}


public CalledLegacySegmentCollection(final File inputFile) {
super(inputFile, CopyRatioSegmentCollection.CopyRatioSegmentTableColumn.COLUMNS, CALLED_LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, CALLED_LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER);
}

// output of SAM-style header is suppressed
@Override
public void write(final File outputFile) {
try (final RecordWriter recordWriter = new RecordWriter(new FileWriter(outputFile, true))) {
recordWriter.writeAllRecords(getRecords());
} catch (final IOException e) {
throw new UserException.CouldNotCreateOutputFile(outputFile, e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ enum LegacySegmentTableColumn {
.append(formatDouble(LegacySegment.getSegmentMean()));

public LegacySegmentCollection(final SampleLocatableMetadata metadata,
final List<LegacySegment> LegacySegments) {
super(metadata, LegacySegments, LegacySegmentTableColumn.COLUMNS, LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER);
final List<LegacySegment> legacySegments) {
super(metadata, legacySegments, LegacySegmentTableColumn.COLUMNS, LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER);
}

// output of SAM-style header is suppressed
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package org.broadinstitute.hellbender.tools.copynumber.formats.records;

import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;

public class CalledLegacySegment extends LegacySegment {

private final CalledCopyRatioSegment.Call call;

public CalledLegacySegment(final String sampleName, final SimpleInterval interval, final int numProbes,
final double segmentMean,
final CalledCopyRatioSegment.Call call) {
super(sampleName, interval, numProbes, segmentMean);
Utils.nonNull(call, "Cannot initialize a called legacy segment with a null call.");
this.call = call;
}

public CalledCopyRatioSegment.Call getCall() {
return call;
}

@Override
public final boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!super.equals(o)) {
return false;
}
final CalledLegacySegment that = (CalledLegacySegment) o;
return call == that.call;
}

@Override
public final int hashCode() {
int result = super.hashCode();
result = 31 * result + call.hashCode();
return result;
}

@Override
public final String toString() {
return "CalledLegacySegment{" +
"interval=" + getInterval() +
", numPoints=" + getNumProbes() +
", meanLog2CopyRatio=" + getSegmentMean() +
", call=" + call +
'}';
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package org.broadinstitute.hellbender.tools.copynumber;

import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledCopyRatioSegmentCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CopyRatioSegmentCollection;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedIntervalCollection;
import org.testng.Assert;
import org.testng.annotations.Test;

Expand All @@ -29,5 +30,13 @@ public void testCallSegments() {
Assert.assertEquals(calledCopyRatioSegments.getMetadata(), copyRatioSegments.getMetadata());
Assert.assertEquals(calledCopyRatioSegments.getIntervals(), copyRatioSegments.getIntervals());
Assert.assertEquals(calledCopyRatioSegments.getRecords().stream().map(s -> s.getCall().getOutputString()).toArray(), new String[] {"+", "-", "0", "0"});

// Test writing the legacy format. Note that reading cannot be done through the CNV tools, since the header has been stripped away.
final File legacySegmentFile = CallCopyRatioSegments.createCalledLegacyOutputFilename(outputFile);
Assert.assertTrue(legacySegmentFile.exists());
Assert.assertTrue(legacySegmentFile.length() > 0);

final AnnotatedIntervalCollection annotatedIntervalCollection = AnnotatedIntervalCollection.create(legacySegmentFile.toPath(), null);
Assert.assertEquals(annotatedIntervalCollection.getRecords().size(), 4);
}
}

0 comments on commit 63d4287

Please sign in to comment.