Skip to content

Commit

Permalink
Fixes #431 and provides supporting code that can also be used in futu…
Browse files Browse the repository at this point in the history
…re for #483
  • Loading branch information
AmandaBirmingham committed May 7, 2019
1 parent 02ed687 commit 67d5c31
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 96 deletions.
65 changes: 61 additions & 4 deletions labcontrol/db/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -2998,6 +2998,51 @@ def _format_sample_sheet_comments(principal_investigator=None,

return ''.join(comments)

@staticmethod
def _set_control_values_to_plate_value(input_df, plate_col_name,
value_col_name):

problem_plate_messages = []

# create a mask to define all the NON-control rows for this plate
non_controls_mask = input_df[value_col_name].notnull()

# get all the unique plates in the dataframe
unique_plates = input_df[plate_col_name].unique()
for curr_unique_plate in unique_plates:
# create a mask to define all the rows for this plate
plate_mask = input_df[plate_col_name] == curr_unique_plate

# create a mask to define all the rows for this plate where the
# value is NOT the control value (None)
plate_non_controls_mask = plate_mask & non_controls_mask

# get the unique values for the part of the df defined in the mask
curr_unique_values = (input_df[plate_non_controls_mask]
[value_col_name].unique())
if len(curr_unique_values) != 1:
curr_err_msg = "Expected one unique value for plate '{0}' " \
"but received {1}: {2}".format(
curr_unique_plate, len(curr_unique_values),
", ".join([str(x) for x in curr_unique_values]))
problem_plate_messages.append(curr_err_msg)
else:
# create a mask to define all the rows for this plate where the
# value IS the control value (None); ~ "nots" a whole series
plate_controls_mask = plate_mask & (~non_controls_mask)

# ok to just take first non-control value because we
# verified above there is only one value there anyway
input_df.loc[plate_controls_mask, value_col_name] = \
curr_unique_values[0]
# end if
# next unique plate

if len(problem_plate_messages) > 0:
raise ValueError("\n".join(problem_plate_messages))

return input_df

def _format_sample_sheet(self, data, sep=','):
"""Formats Illumina-compatible sample sheet.
Expand Down Expand Up @@ -3608,6 +3653,9 @@ def _generate_amplicon_prep_information(self):
# for content
data[curr_prep_sheet_id][content] = result

plate_col_name = 'Sample_Plate'
proj_col_name = 'Project_name'

# converting from dict to pandas and then to tsv
for curr_prep_sheet_id, vals in data.items():
df = pd.DataFrame.from_dict(vals, orient='index')
Expand All @@ -3629,14 +3677,14 @@ def _generate_amplicon_prep_information(self):
# 1/3. renaming columns so they match expected casing
mv = {
'barcode': 'BARCODE', 'master_mix': 'MasterMix_lot',
'platform': 'PLATFORM', 'sample_plate': 'Sample_Plate',
'platform': 'PLATFORM', 'sample_plate': plate_col_name,
'run_prefix': 'RUN_PREFIX', 'primer_date': 'Primer_date',
'extraction_robot': 'Extraction_robot',
'runid': 'RUNID', 'epmotion_tm50_8_tool': 'TM50_8_tool',
'library_construction_protocol':
'LIBRARY_CONSTRUCTION_PROTOCOL',
'plating': 'Plating', 'linker': 'LINKER',
'project_name': 'Project_name', 'orig_name2': 'Orig_name',
'project_name': proj_col_name, 'orig_name2': 'Orig_name',
'well_id': 'Well_ID', 'water_lot': 'Water_Lot',
'well_description': 'Well_description',
'run_center': 'RUN_CENTER',
Expand All @@ -3657,16 +3705,25 @@ def _generate_amplicon_prep_information(self):
# final output.
df.drop(['orig_name'], axis=1)

# Ensure that each sample plate included in sequencing run does not
# contain experimental samples for more than (or less than) one
# qiita study; assuming this is true, set the project column value
# for each non-experimental samples to the value of the project
# name for the (single) qiita study on the non-experimental
# sample's plate.
df = self._set_control_values_to_plate_value(df, plate_col_name,
proj_col_name)

# 2/3. sorting rows
rows_order = ['Sample_Plate', 'row_num', 'col_num']
rows_order = [plate_col_name, 'row_num', 'col_num']
df.sort_values(by=rows_order, inplace=True)
# 3/3. sorting and keeping only required columns
order = [
'BARCODE', 'PRIMER', 'Primer_Plate', 'Well_ID', 'Plating',
'ExtractionKit_lot', 'Extraction_robot', 'TM1000_8_tool',
'Primer_date', 'MasterMix_lot', 'Water_Lot',
'Processing_robot', 'TM300_8_tool', 'TM50_8_tool',
'Sample_Plate', 'Project_name', 'Orig_name',
plate_col_name, proj_col_name, 'Orig_name',
'Well_description', 'EXPERIMENT_DESIGN_DESCRIPTION',
'LIBRARY_CONSTRUCTION_PROTOCOL', 'LINKER', 'PLATFORM',
'RUN_CENTER', 'RUN_DATE', 'RUN_PREFIX', 'pcr_primers',
Expand Down
Loading

0 comments on commit 67d5c31

Please sign in to comment.