Skip to content

Commit

Permalink
Close #486 by fixing a double counting bug that it introduced.
Browse files Browse the repository at this point in the history
  • Loading branch information
donkirkby committed Dec 2, 2019
1 parent 62af69d commit b8ec9e1
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 9 deletions.
9 changes: 4 additions & 5 deletions micall/core/aln2counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,11 +504,10 @@ def combine_reports(self):
for region, report in self.reports.items():
old_report = old_reports[region]
for i, report_amino in enumerate(report):
if i < len(old_report):
old_report[i].seed_amino.add(report_amino.seed_amino)
else:
report_amino.seed_amino.consensus_nuc_index = None
old_report.append(report_amino)
while len(old_report) <= i:
old_report.append(ReportAmino(SeedAmino(None),
len(old_report)+1))
old_report[i].seed_amino.add(report_amino.seed_amino)
self.reports.clear()

def write_amino_report(self, amino_writer, reports, seed, coverage_summary=None):
Expand Down
6 changes: 5 additions & 1 deletion micall/g2p/fastq_g2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ def parse_args():
parser.add_argument('aligned_csv',
type=argparse.FileType('w'),
help='<output> CSV containing mapped reads aligned to HIV seed')
parser.add_argument('merged_contigs_csv',
type=argparse.FileType('w'),
help='<output> CSV containing amplicon contigs based on length')
return parser.parse_args()


Expand Down Expand Up @@ -546,7 +549,8 @@ def main():
aligned_csv=args.aligned_csv,
min_count=DEFAULT_MIN_COUNT,
min_valid=MIN_VALID,
min_valid_percent=MIN_VALID_PERCENT)
min_valid_percent=MIN_VALID_PERCENT,
merged_contigs_csv=args.merged_contigs_csv)


if __name__ == '__main__':
Expand Down
98 changes: 98 additions & 0 deletions micall/tests/test_aln2counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,104 @@ def testMultiplePrefixNucleotideReport(self):
self.detail_report_file.getvalue())
self.assertMultiLineEqual(expected_text, self.report_file.getvalue())

def testMultiplePrefixNucleotideReportOverlappingRegions(self):
""" Assemble counts from two contigs when coordinate regions overlap.
Contig 1-R1 AAATTT -> KF
Contig 2-R1 TTTAGG -> FR
Contig 1 and 2 should combine into R1 with KFR, but
"""
self.report.projects.load(StringIO("""\
{
"projects": {
"R1": {
"max_variants": 0,
"regions": [
{
"coordinate_region": "R1",
"seed_region_names": ["R1-seed"]
},
{
"coordinate_region": "R1-expanded",
"seed_region_names": ["R1-seed"]
}
]
}
},
"regions": {
"R1-seed": {
"is_nucleotide": true,
"reference": [
"AAATTTAGG"
]
},
"R1": {
"is_nucleotide": false,
"reference": [
"KFR"
]
},
"R1-expanded": {
"is_nucleotide": false,
"reference": [
"GPKFREH"
]
}
}
}
"""))
# refname,qcut,rank,count,offset,seq
aligned_reads1 = self.prepareReads("1-R1-seed,15,0,5,0,AAATTT")
aligned_reads2 = self.prepareReads("2-R1-seed,15,0,2,0,TTTAGG")

expected_text = """\
seed,region,q-cutoff,query.nuc.pos,refseq.nuc.pos,\
A,C,G,T,N,del,ins,clip,v3_overlap,coverage
R1-seed,R1,15,,1,5,0,0,0,0,0,0,0,0,5
R1-seed,R1,15,,2,5,0,0,0,0,0,0,0,0,5
R1-seed,R1,15,,3,5,0,0,0,0,0,0,0,0,5
R1-seed,R1,15,,4,0,0,0,7,0,0,0,0,0,7
R1-seed,R1,15,,5,0,0,0,7,0,0,0,0,0,7
R1-seed,R1,15,,6,0,0,0,7,0,0,0,0,0,7
R1-seed,R1,15,,7,2,0,0,0,0,0,0,0,0,2
R1-seed,R1,15,,8,0,0,2,0,0,0,0,0,0,2
R1-seed,R1,15,,9,0,0,2,0,0,0,0,0,0,2
R1-seed,R1-expanded,15,,1,0,0,0,0,0,0,0,0,0,0
R1-seed,R1-expanded,15,,2,0,0,0,0,0,0,0,0,0,0
R1-seed,R1-expanded,15,,3,0,0,0,0,0,0,0,0,0,0
R1-seed,R1-expanded,15,,4,0,0,0,0,0,0,0,0,0,0
R1-seed,R1-expanded,15,,5,0,0,0,0,0,0,0,0,0,0
R1-seed,R1-expanded,15,,6,0,0,0,0,0,0,0,0,0,0
R1-seed,R1-expanded,15,,7,5,0,0,0,0,0,0,0,0,5
R1-seed,R1-expanded,15,,8,5,0,0,0,0,0,0,0,0,5
R1-seed,R1-expanded,15,,9,5,0,0,0,0,0,0,0,0,5
R1-seed,R1-expanded,15,,10,0,0,0,7,0,0,0,0,0,7
R1-seed,R1-expanded,15,,11,0,0,0,7,0,0,0,0,0,7
R1-seed,R1-expanded,15,,12,0,0,0,7,0,0,0,0,0,7
R1-seed,R1-expanded,15,,13,2,0,0,0,0,0,0,0,0,2
R1-seed,R1-expanded,15,,14,0,0,2,0,0,0,0,0,0,2
R1-seed,R1-expanded,15,,15,0,0,2,0,0,0,0,0,0,2
R1-seed,R1-expanded,15,,16,0,0,0,0,0,0,0,0,0,0
R1-seed,R1-expanded,15,,17,0,0,0,0,0,0,0,0,0,0
R1-seed,R1-expanded,15,,18,0,0,0,0,0,0,0,0,0,0
R1-seed,R1-expanded,15,,19,0,0,0,0,0,0,0,0,0,0
R1-seed,R1-expanded,15,,20,0,0,0,0,0,0,0,0,0,0
R1-seed,R1-expanded,15,,21,0,0,0,0,0,0,0,0,0,0
"""

self.report.write_nuc_header(self.report_file)
self.report.write_nuc_detail_header(self.detail_report_file)
self.report.read(aligned_reads1)
self.report.write_nuc_detail_counts()
self.report.combine_reports()
self.report.read(aligned_reads2)
self.report.write_nuc_detail_counts()
self.report.combine_reports()
self.report.write_nuc_counts()

self.assertMultiLineEqual(expected_text, self.report_file.getvalue())

def testContigCoverageReport(self):
""" Assemble counts from three contigs to two references.
Expand Down
15 changes: 12 additions & 3 deletions release_test_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@ def report_source_versions(runs):
version = os.path.basename(run.source_path)
version_runs[version].append(run.source_path)
yield run
max_count = max(len(paths) for paths in version_runs.values())
if version_runs:
max_count = max(len(paths) for paths in version_runs.values())
else:
max_count = 0
for version, paths in sorted(version_runs.items()):
if len(paths) == max_count:
print(version, max_count)
Expand Down Expand Up @@ -534,7 +537,7 @@ def main():
samples,
chunksize=50)
scenario_summaries = defaultdict(list)
i = None
i = 0
all_consensus_distances = []
report_count = 0
for i, (report, scenarios, consensus_distances) in enumerate(results, 1):
Expand All @@ -556,6 +559,13 @@ def main():
print(*summary, end='.\n')
print(body, end='')

report_distances(all_consensus_distances)
print('Finished {} samples.'.format(i))


def report_distances(all_consensus_distances):
if not all_consensus_distances:
return
distance_data = pd.DataFrame(all_consensus_distances)
non_zero_distances = distance_data[distance_data['distance'] != 0]
region_names = sorted(non_zero_distances['region'].unique())
Expand All @@ -569,7 +579,6 @@ def main():
'consensus_diffs_{}.svg'.format(page_num),
'Consensus Differences Between Previous and v' + MICALL_VERSION,
'pct_diff')
print('Finished {} samples.'.format(i))


if __name__ == '__main__':
Expand Down

0 comments on commit b8ec9e1

Please sign in to comment.