Skip to content

Commit

Permalink
DATAOPS-779: Fix undetermined percentage handler (#117)
Browse files Browse the repository at this point in the history
* Add nan phix values support to %Undetermined handler

* Add .vscode to gitignore and bump version

* Add new miseq testdata with index reads

* Use MiSeq dataset with index in interop parser tests

* Remove MiSeqDemo

* Restructure mean phix calculation to avoid numpy warning

* Add pull requests as GHA trigger

* Make interop dict for Receiver in interop test
  • Loading branch information
matrulda authored Sep 12, 2024
1 parent dfba84e commit 5f91e1b
Show file tree
Hide file tree
Showing 46 changed files with 30,230 additions and 151 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: Run Unit Tests

on: [push]
on: [push, pull_request]

jobs:
build:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ dist/
.python-env/
.coverage
.cache
.vscode/
2 changes: 1 addition & 1 deletion checkQC/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@

__version__ = "4.0.3"
__version__ = "4.0.5-rc1"
16 changes: 8 additions & 8 deletions checkQC/handlers/undetermined_percentage_handler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

from collections import defaultdict
import numpy as np

from checkQC.handlers.qc_handler import QCHandler, QCErrorFatal, QCErrorWarning
from checkQC.parsers.stats_json_parser import StatsJsonParser
Expand Down Expand Up @@ -33,13 +34,12 @@ def collect(self, signal):
self.phix_aligned[value["lane"]][value["read"]] = value["percent_phix"]

def _compute_mean_percentage_phix_aligned_for_lanes(self):
lane_and_mean_percentage_phix_aliged = {}
lane_and_mean_percentage_phix_aligned = {}
for lane, reads in self.phix_aligned.items():
mean = 0
for read, value in reads.items():
mean += value / len(reads)
lane_and_mean_percentage_phix_aliged[lane] = mean
return lane_and_mean_percentage_phix_aliged
reads_list = list(reads.values())
mean_phix = 0 if all(np.isnan(reads_list)) else np.nanmean(reads_list)
lane_and_mean_percentage_phix_aligned[lane] = mean_phix
return lane_and_mean_percentage_phix_aligned

def check_qc(self):

Expand Down Expand Up @@ -74,13 +74,13 @@ def create_data_dict(value):

if self.error() != self.UNKNOWN and percentage_undetermined > compute_threshold(self.error()):
yield QCErrorFatal("The percentage of undetermined indexes was"
" to high on lane {}, it was: {:.2f}%".format(lane_nbr,
" too high on lane {}, it was: {:.2f}%".format(lane_nbr,
percentage_undetermined),
ordering=lane_nbr,
data=create_data_dict(self.error()))
elif self.warning() != self.UNKNOWN and percentage_undetermined > compute_threshold(self.warning()):
yield QCErrorWarning("The percentage of undetermined indexes was "
"to high on lane {}, it was: {:.2f}%".format(lane_nbr,
"too high on lane {}, it was: {:.2f}%".format(lane_nbr,
percentage_undetermined),
ordering=lane_nbr,
data=create_data_dict(self.warning()))
Expand Down
5 changes: 0 additions & 5 deletions checkQC/parsers/interop_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,6 @@ def run(self):
lanes = summary.lane_count()

for lane in range(lanes):
# The interop library uses zero based indexing,
#however most people uses read 1/2
# to denote the different reads,
#this enumeration is used to transform from
# zero based indexing to this form. /JD 2017-10-27
for read_nbr in range(summary.size()):
read = summary.at(read_nbr).at(lane)
error_rate = read.error_rate().mean()
Expand Down
9 changes: 5 additions & 4 deletions tests/handlers/test_undetermined_percentage_handler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import unittest
import numpy as np

from checkQC.handlers.undetermined_percentage_handler import UndeterminedPercentageHandler

Expand All @@ -17,9 +18,9 @@ def setUp(self):

percentage_phix_key = "percent_phix"
percentage_phix_value_lane_1_read_1 = {"lane": 1, "read": 1, "percent_phix": 1}
percentage_phix_value_lane_1_read_2 = {"lane": 1, "read": 2, "percent_phix": 1}
percentage_phix_value_lane_2_read_1 = {"lane": 2, "read": 1, "percent_phix": 1}
percentage_phix_value_lane_2_read_2 = {"lane": 2, "read": 2, "percent_phix": 1}
percentage_phix_value_lane_1_read_2 = {"lane": 1, "read": 2, "percent_phix": np.nan}
percentage_phix_value_lane_2_read_1 = {"lane": 2, "read": 1, "percent_phix": np.nan}
percentage_phix_value_lane_2_read_2 = {"lane": 2, "read": 2, "percent_phix": np.nan}
undetermined_handler.collect((percentage_phix_key, percentage_phix_value_lane_1_read_1))
undetermined_handler.collect((percentage_phix_key, percentage_phix_value_lane_1_read_2))
undetermined_handler.collect((percentage_phix_key, percentage_phix_value_lane_2_read_1))
Expand All @@ -37,7 +38,7 @@ def test_all_is_fine(self):
self.assertEqual(errors_and_warnings, [])

def test_warning(self):
qc_config = {'name': 'UndeterminedPercentageHandler', 'error': 2, 'warning': 1}
qc_config = {'name': 'UndeterminedPercentageHandler', 'error': 3, 'warning': 1}
self.set_qc_config(qc_config)
errors_and_warnings = list(self.undetermined_handler.check_qc())
self.assertEqual(len(errors_and_warnings), 2)
Expand Down
106 changes: 59 additions & 47 deletions tests/parsers/test_interop_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,93 +15,105 @@ class TestInteropParser(unittest.TestCase):

class Receiver(object):
def __init__(self):
self.error_rate_values = []
self.percent_q30_values = []
self.percent_q30_per_cycle = []
self.metrics = {'error_rate': [],
'percent_q30': [],
'percent_q30_per_cycle': [],
'percent_phix': [],
}
self.subscriber = self.subscribe()
next(self.subscriber)

def subscribe(self):
while True:
interop_stat = yield
key = list(interop_stat)[0]
if key == "error_rate":
self.error_rate_values.append(interop_stat)
if key == "percent_q30":
self.percent_q30_values.append(interop_stat)
if key == "percent_q30_per_cycle":
self.percent_q30_per_cycle.append(interop_stat)
self.metrics[key].append(interop_stat)

def send(self, value):
self.subscriber.send(value)

runfolder = os.path.join(os.path.dirname(__file__), "..",
runfolder = os.path.join(os.path.dirname(__file__), "..",
"resources",
"MiSeqDemo")
interop_parser = InteropParser(runfolder=runfolder,
"230825_M04034_0043_000000000-L6NVV")
interop_parser = InteropParser(runfolder=runfolder,
parser_configurations=None)
subscriber = Receiver()
interop_parser.add_subscribers(subscriber)
interop_parser.run()

def test_read_error_rate(self):
self.assertListEqual(self.subscriber.error_rate_values,
[('error_rate',
{'lane': 1,
'read': 1,
'error_rate': 1.5317546129226685}),
('error_rate',
{'lane': 1,
'read': 2,
'error_rate': 1.9201501607894897})])

error_rates = [x[1]['error_rate'] for x in self.subscriber.metrics['error_rate']]
self.assertEqual(error_rates[0], 0.587182343006134)
self.assertTrue(np.isnan(error_rates[1]))
self.assertTrue(np.isnan(error_rates[2]))
self.assertEqual(error_rates[3], 0.8676796555519104)

def test_percent_phix(self):
phix = [x[1]['percent_phix'] for x in self.subscriber.metrics['percent_phix']]
self.assertEqual(phix[0], 15.352058410644531)
self.assertTrue(np.isnan(phix[1]))
self.assertTrue(np.isnan(phix[2]))
self.assertEqual(phix[3], 14.5081205368042)

def test_percent_q30(self):
self.assertListEqual(self.subscriber.percent_q30_values,
[('percent_q30',
{'lane': 1,
'read': 1,
'percent_q30': 93.42070007324219,
self.assertListEqual(self.subscriber.metrics['percent_q30'],
[('percent_q30',
{'lane': 1,
'read': 1,
'percent_q30': 95.3010025024414,
'is_index_read': False}),
('percent_q30',
{'lane': 1,
'read': 2,
'percent_q30': 84.4270248413086,
('percent_q30',
{'lane': 1,
'read': 2,
'percent_q30': 82.97042846679688,
'is_index_read': True}),
('percent_q30',
{'lane': 1,
'read': 3,
'percent_q30': 97.44789123535156,
'is_index_read': True}),
('percent_q30',
{'lane': 1,
'read': 4,
'percent_q30': 90.55824279785156,
'is_index_read': False})])
def test_percent_q30_per_cycle(self):
percent_q30_per_cycle = self.subscriber.percent_q30_per_cycle

def test_percent_q30_per_cycle_subscriber_output(self):
percent_q30_per_cycle = self.subscriber.metrics['percent_q30_per_cycle']
self.assertEqual(percent_q30_per_cycle[0][1]['read'], 1)
self.assertAlmostEqual(
percent_q30_per_cycle[0][1]['percent_q30_per_cycle'][10],
98.41526794433594
96.68322,
places=5,
)

self.assertEqual(percent_q30_per_cycle[1][1]['read'], 2)
self.assertTrue(percent_q30_per_cycle[1][1]['is_index_read'])
self.assertAlmostEqual(
percent_q30_per_cycle[1][1]['percent_q30_per_cycle'][10],
95.20341491699219
percent_q30_per_cycle[1][1]['percent_q30_per_cycle'][1],
80.69179,
places=5,
)

def test_get_percent_q30_per_cycle(self):
q_metrics = imaging(self.runfolder,
valid_to_load=['Q'])

percent_q30_per_cycle = InteropParser.get_percent_q30_per_cycle(
q_metrics=q_metrics,
lane_nr=0,
lane_nr=0,
read_nr=0,
is_index_read=False,
)

expected_out = {
6: 98.76343,
48: 97.841576,
90: 96.81421,
132: 95.90264,
174: 94.69448,
216: 91.90525,
258: 87.162094,
6: 97.17214,
18: 97.1332,
25: 97.38965,
50: 96.62786,
75: 96.30572,
100: 94.63465,
136: 92.64536,
}

#Select cycles from the expected_out-dict.
Expand Down
Loading

0 comments on commit 5f91e1b

Please sign in to comment.