Skip to content

Commit

Permalink
Merge pull request #222 from tanaes/fix-amplicon-pooling
Browse files Browse the repository at this point in the history
Fix amplicon pooling (issue #204)
  • Loading branch information
tanaes authored May 4, 2018
2 parents 7b44af9 + e12abd4 commit a085877
Show file tree
Hide file tree
Showing 10 changed files with 575 additions and 379 deletions.
212 changes: 100 additions & 112 deletions labman/db/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def factory(process_id):
'shotgun library prep': LibraryPrepShotgunProcess,
'quantification': QuantificationProcess,
'gDNA normalization': NormalizationProcess,
'compress gDNA plates': GDNAPlateCompressionProcess,
'compressed gDNA plates': GDNAPlateCompressionProcess,
'pooling': PoolingProcess,
'sequencing': SequencingProcess}

Expand Down Expand Up @@ -605,7 +605,7 @@ class GDNAPlateCompressionProcess(Process):
"""
_table = 'qiita.compression_process'
_id_column = 'compression_process_id'
_process_type = "compress gDNA plates"
_process_type = "compressed gDNA plates"

def _compress_plate(self, out_plate, in_plate, row_pad, col_pad, volume=1):
"""Compresses the 96-well in_plate into the 384-well out_plate"""
Expand Down Expand Up @@ -1575,7 +1575,7 @@ class QuantificationProcess(Process):
_process_type = 'quantification'

@staticmethod
def _compute_shotgun_pico_concentration(dna_vals, size=500):
def _compute_pico_concentration(dna_vals, size=500):
"""Computes molar concentration of libraries from library DNA
concentration values.
Expand Down Expand Up @@ -1831,64 +1831,29 @@ def concentrations(self):
(composition_module.Composition.factory(comp_id), r_con, c_con)
for comp_id, r_con, c_con in TRN.execute_fetchindex()]

def compute_concentrations(self, dna_amount=240, min_val=1, max_val=15,
blank_volume=2, size=500):
"""Compute the normalized concentrations
def compute_concentrations(self, size=500):
"""Compute the normalized library molarity based on pico green dna
concentrations estimates.
Parameters
----------
dna_amount: float, optional
(Amplicon) Total amount of DNA, in ng. Default: 240
min_val: float, optional
(Amplicon) Minimum amount of DNA to normalize to (nM). Default: 1
max_val: float, optional
(Amplicon) Maximum value. Wells above this number will be
excluded (nM). Default: 15
blank_volume: float, optional
(Amplicon) Amount to pool for the blanks (nM). Default: 2.
size: int, optional
(Shotgun) The average library molecule size, in bp.
The average library molecule size, in bp.
"""
concentrations = self.concentrations
layout = concentrations[0][0].container.plate.layout

res = None
if isinstance(concentrations[0][0],
composition_module.LibraryPrep16SComposition):
# Amplicon
sample_concs = np.zeros_like(layout, dtype=float)
is_blank = np.zeros_like(layout, dtype=bool)
for comp, r_conc, _ in concentrations:
well = comp.container
row = well.row - 1
col = well.column - 1
sample_concs[row][col] = r_conc
sc = comp.gdna_composition.sample_composition
is_blank[row][col] = sc.sample_composition_type == 'blank'

res = QuantificationProcess._compute_amplicon_pool_values(
sample_concs, dna_amount)
res[sample_concs < min_val] = min_val
# If there is any sample whose concentration is above the
# user-defined max_value, the decision is to not pool that sample.
# To not pool the sample, define it's volume to 0 and it will not
# get pooled.
res[sample_concs > max_val] = 0
res[is_blank] = blank_volume
elif isinstance(concentrations[0][0],
composition_module.LibraryPrepShotgunComposition):
# Shotgun
sample_concs = np.zeros_like(layout, dtype=float)
for comp, r_conc, _ in concentrations:
well = comp.container
row = well.row - 1
col = well.column - 1
sample_concs[row][col] = r_conc

res = QuantificationProcess._compute_shotgun_pico_concentration(
sample_concs, size)
# No need for else, because if it is not one of the above types
# we don't need to do anything

sample_concs = np.zeros_like(layout, dtype=float)
for comp, r_conc, _ in concentrations:
well = comp.container
row = well.row - 1
col = well.column - 1
sample_concs[row][col] = r_conc

res = QuantificationProcess._compute_pico_concentration(
sample_concs, size)

if res is not None:
sql_args = []
Expand Down Expand Up @@ -1971,7 +1936,7 @@ def estimate_pool_conc_vol(sample_vols, sample_concs):
return (pool_conc, total_vol)

@staticmethod
def compute_shotgun_pooling_values_eqvol(sample_concs, total_vol=60.0):
def compute_pooling_values_eqvol(sample_concs, total_vol=60.0, **kwargs):
"""Computes molar concentration of libraries from concentration values,
using an even volume per sample
Expand All @@ -1992,9 +1957,9 @@ def compute_shotgun_pooling_values_eqvol(sample_concs, total_vol=60.0):
return sample_vols

@staticmethod
def compute_shotgun_pooling_values_minvol(
sample_concs, sample_fracs=None, floor_vol=100, floor_conc=40,
total_nmol=.01):
def compute_pooling_values_minvol(
sample_concs, sample_fracs=None, floor_vol=2, floor_conc=16,
total=240, total_each=True, vol_constant=1, **kwargs):
"""Computes pooling volumes for samples based on concentration
estimates of nM concentrations (`sample_concs`), taking a minimum
volume of samples below a threshold.
Expand All @@ -2010,7 +1975,7 @@ def compute_shotgun_pooling_values_minvol(
pooling.
Finally, total pooling size is determined by a target nanomolar
quantity (`total_nmol`, default .01). For a perfect 384 sample library,
quantity (`total`, default .01). For a perfect 384 sample library,
in which you had all samples at a concentration of exactly 400 nM and
wanted a total volume of 60 uL, this would be 0.024 nmol.
Expand All @@ -2022,90 +1987,113 @@ def compute_shotgun_pooling_values_minvol(
Parameters
----------
sample_concs: 2D array of float
nM sample concentrations
sample concentrations, with numerator same units as `total`.
sample_fracs: 2D of float, optional
fractional value for each sample (default 1/N)
floor_vol: float, optional
volume (nL) at which samples below floor_conc will be pooled.
volume at which samples below floor_conc will be pooled.
Default: 100
floor_conc: float, optional
minimum value (nM) for pooling at real estimated value. Default: 40
total_nmol : float, optional
total number of nM to have in pool. Default: 0.01
minimum value for pooling at real estimated value. Default: 40
total : float, optional
total quantity (numerator) for pool. Unitless, but could represent
for example ng or nmol. Default: 240
total_each : bool, optional
whether `total` refers to the quantity pooled *per sample*
(default; True) or to the total quantity of the pool.
vol_constant : float, optional
conversion factor between `sample_concs` demoninator and output
pooling volume units. E.g. if pooling ng/µL concentrations and
producing µL pool volumes, `vol_constant` = 1. If pooling nM
concentrations and producing nL pool volumes, `vol_constant` =
10**-9. Default: 1
Returns
-------
sample_vols: np.array of floats
the volumes in nL per each sample pooled
"""
if sample_fracs is None:
sample_fracs = np.ones(sample_concs.shape) / sample_concs.size
sample_fracs = np.ones(sample_concs.shape)

if not total_each:
sample_fracs = sample_fracs / sample_concs.size

# calculate volumetric fractions including floor val
sample_vols = (total_nmol * sample_fracs) / sample_concs
# convert L to nL
sample_vols *= 10**9
sample_vols = (total * sample_fracs) / sample_concs
# convert volume from concentration units to pooling units
sample_vols *= vol_constant
# drop volumes for samples below floor concentration to floor_vol
sample_vols[sample_concs < floor_conc] = floor_vol
return sample_vols

@staticmethod
def compute_shotgun_pooling_values_floor(
sample_concs, sample_fracs=None, min_conc=10, floor_conc=50,
total_nmol=.01):
"""Computes pooling volumes for samples based on concentration
estimates of nM concentrations (`sample_concs`).
def adjust_blank_vols(pool_vols, comp_blanks, blank_vol):
"""Specifically adjust blanks to a value specified volume
Reads in concentration values in nM. Samples must be above a minimum
concentration threshold (`min_conc`, default 10 nM) to be included.
Samples above this threshold but below a given floor concentration
(`floor_conc`, default 50 nM) will be pooled as if they were at the
floor concentration, to avoid overdiluting the pool.
Parameters
----------
pool_vols: np.array
The per-well pool volumes
comp_blanks: np.array of bool
Boolean array indicating which wells are blanks
blank_vol: float
Volume at which to pool blanks
Samples can be assigned a target molar fraction in the pool by passing
a np.array (`sample_fracs`, same shape as `sample_concs`) with
fractional values per sample. By default, will aim for equal molar
pooling.
Returns
-------
np.array
The adjusted per-well pool volumes
"""

Finally, total pooling size is determined by a target nanomolar
quantity (`total_nmol`, default .01). For a perfect 384 sample library,
in which you had all samples at a concentration of exactly 400 nM and
wanted a total volume of 60 uL, this would be 0.024 nmol.
pool_vols[comp_blanks] = blank_vol

return(pool_vols)

@staticmethod
def select_blanks(pool_vols, raw_concs, comp_blanks, blank_num):
"""Specifically retain only the N most concentrated blanks
Parameters
----------
sample_concs: 2D array of float
nM calculated by compute_qpcr_concentration
sample_fracs: 2D of float, optional
fractional value for each sample (default 1/N)
min_conc: float, optional
minimum nM concentration to be included in pool. Default: 10
floor_conc: float, optional
minimum value for pooling for samples above min_conc. Default: 50
total_nmol : float, optional
total number of nM to have in pool. Default 0.01
pool_vols: np.array
The per-well pool volumes
raw_concs: np.array of float
The per-well concentrations
comp_blanks: np.array of bool
Boolean array indicating which wells are blanks
blank_num: int
The number of blanks N to pool (in order of highest concentration)
Returns
-------
sample_vols: np.array of floats
the volumes in nL per each sample pooled
np.array
The adjusted per-well pool volumes
"""
if sample_fracs is None:
sample_fracs = np.ones(sample_concs.shape) / sample_concs.size

# get samples above threshold
sample_fracs_pass = sample_fracs.copy()
sample_fracs_pass[sample_concs <= min_conc] = 0
# renormalize to exclude lost samples
sample_fracs_pass *= 1/sample_fracs_pass.sum()
# floor concentration value
sample_concs_floor = sample_concs.copy()
sample_concs_floor[sample_concs < floor_conc] = floor_conc
# calculate volumetric fractions including floor val
sample_vols = (total_nmol * sample_fracs_pass) / sample_concs_floor
# convert L to nL
sample_vols *= 10**9
return sample_vols

if blank_num < 0:
raise ValueError("blank_num cannot be negative (passed: %s)" %
blank_num)

if comp_blanks.shape != pool_vols.shape != raw_concs.shape:
raise ValueError("all input arrays must be same shape")

blanks = []

adjusted_vols = pool_vols.copy()

for index, x in np.ndenumerate(comp_blanks):
if x:
blanks.append((raw_concs[index], index))

sorted_blanks = sorted(blanks, key=lambda tup: tup[0], reverse=True)

reject_blanks = sorted_blanks[blank_num:]

for _, idx in reject_blanks:
adjusted_vols[idx] = 0

return(adjusted_vols)

@classmethod
def create(cls, user, quantification_process, pool_name, volume,
Expand Down
2 changes: 1 addition & 1 deletion labman/db/support_files/db_patch_manual.sql
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ INSERT INTO qiita.process_type (description) VALUES
('primer template creation'), ('primer working plate creation'),
('sample plating'), ('gDNA extraction'), ('16S library prep'),
('shotgun library prep'), ('quantification'), ('gDNA normalization'),
('pooling'), ('reagent creation'), ('sequencing'), ('compress gDNA plates');
('pooling'), ('reagent creation'), ('sequencing'), ('compressed gDNA plates');

-- Populate the equipment type table
INSERT INTO qiita.equipment_type (description) VALUES
Expand Down
16 changes: 11 additions & 5 deletions labman/db/support_files/populate_test_db.sql
Original file line number Diff line number Diff line change
Expand Up @@ -571,9 +571,10 @@ BEGIN
RETURNING process_id INTO p_pool_process_id;

INSERT INTO qiita.pooling_process (process_id, quantification_process_id, robot_id, destination, pooling_function_data)
VALUES (p_pool_process_id, pg_quant_subprocess_id, proc_robot_id, 1, '{"function": "amplicon", "parameters": {"dna-amount-": 240, "min-val-": 1, "max-val-": 15, "blank-val-": 2}}'::json)
VALUES (p_pool_process_id, pg_quant_subprocess_id, proc_robot_id, 1, '{"function": "amplicon", "parameters": {"total-": 240, "floor-vol-": 2, "floor-conc-": 16}}'::json)
RETURNING pooling_process_id INTO p_pool_subprocess_id;


----------------------------------------
------ SEQUENCING POOLING PROCESS ------
----------------------------------------
Expand Down Expand Up @@ -660,7 +661,7 @@ BEGIN

-- Quantify plate pools
INSERT INTO qiita.concentration_calculation (quantitated_composition_id, upstream_process_id, raw_concentration)
VALUES (p_pool_composition_id, ppg_quant_subprocess_id, 1.5);
VALUES (p_pool_composition_id, ppg_quant_subprocess_id, 25);

-- Pool sequencing run
INSERT INTO qiita.container (container_type_id, latest_upstream_process_id, remaining_volume)
Expand Down Expand Up @@ -721,7 +722,7 @@ BEGIN

SELECT process_type_id INTO gdna_comp_process_type_id
FROM qiita.process_type
WHERE description = 'compress gDNA plates';
WHERE description = 'compressed gDNA plates';

INSERT INTO qiita.process (process_type_id, run_date, run_personnel_id)
VALUES (gdna_comp_process_type_id, '10/25/2017', '[email protected]')
Expand Down Expand Up @@ -964,8 +965,13 @@ BEGIN
VALUES (lib_prep_16s_composition_id, gdna_subcomposition_id, primer_comp_id);

-- Quantification
INSERT INTO qiita.concentration_calculation (quantitated_composition_id, upstream_process_id, raw_concentration, computed_concentration)
VALUES (lib_prep_16s_composition_id, pg_quant_subprocess_id, 1.5, 1.5);
IF idx_row_well <= 7 THEN
INSERT INTO qiita.concentration_calculation (quantitated_composition_id, upstream_process_id, raw_concentration, computed_concentration)
VALUES (lib_prep_16s_composition_id, pg_quant_subprocess_id, 20., 60.6060);
ELSE
INSERT INTO qiita.concentration_calculation (quantitated_composition_id, upstream_process_id, raw_concentration, computed_concentration)
VALUES (lib_prep_16s_composition_id, pg_quant_subprocess_id, 1., 3.0303);
END IF;

-- Pool plate
INSERT INTO qiita.pool_composition_components (output_pool_composition_id, input_composition_id, input_volume, percentage_of_output)
Expand Down
2 changes: 1 addition & 1 deletion labman/db/tests/test_composition.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def test_pool_composition_attributes(self):
exp = {'composition': LibraryPrep16SComposition(1),
'input_volume': 1.0, 'percentage_of_output': 0}
self.assertEqual(obs_comp[0], exp)
self.assertEqual(obs.raw_concentration, 1.5)
self.assertEqual(obs.raw_concentration, 25.0)

def test_primer_set_attributes(self):
obs = PrimerSet(1)
Expand Down
Loading

0 comments on commit a085877

Please sign in to comment.