Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cheek Samples #597

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions microsetta_private_api/admin/admin_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,7 @@ def list_barcode_query_fields(token_info):
'input': 'select',
'values': {
"Blood (skin prick)": "Blood (skin prick)",
"Cheek": "Cheek",
"Saliva": "Saliva",
"Ear wax": "Ear wax",
"Forehead": "Forehead",
Expand Down
6 changes: 5 additions & 1 deletion microsetta_private_api/api/_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,15 @@ def update_sample_association(account_id, source_id, sample_id, body,
# sample_site will not be present if its environmental. this will
# default to None if the key is not present
sample_site = body.get('sample_site')

barcode_meta = body.get('barcode_meta')

sample_info = SampleInfo(
sample_id,
sample_datetime,
sample_site,
body["sample_notes"]
body["sample_notes"],
barcode_meta
)

sample_repo.update_info(account_id, source_id, sample_info,
Expand Down
2 changes: 1 addition & 1 deletion microsetta_private_api/api/microsetta_private_api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3531,7 +3531,7 @@ components:
nullable: true
sample_site:
enum: ["Blood (skin prick)", "Saliva", "Ear wax", "Forehead", "Fur", "Hair", "Left hand", "Left leg", "Mouth", "Nares", "Nasal mucus",
"Right hand", "Right leg", "Stool", "Tears", "Torso", "Vaginal mucus", null]
"Right hand", "Right leg", "Stool", "Tears", "Torso", "Vaginal mucus", "Cheek", null]
example: "Stool"
sample_edit_locked:
type: boolean
Expand Down
7 changes: 5 additions & 2 deletions microsetta_private_api/api/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,9 @@
'sample_projects': ['American Gut Project'],
'account_id': None,
'source_id': None,
'sample_site': None}
'sample_site': None,
'barcode_meta': {}
}

DUMMY_FILLED_SAMPLE_INFO = {
'sample_barcode': BARCODE,
Expand Down Expand Up @@ -581,7 +583,8 @@ def create_dummy_sample_objects(filled=False):
info_dict["sample_id"],
datetime_obj,
info_dict["sample_site"],
info_dict["sample_notes"]
info_dict["sample_notes"],
{}
)

sample = Sample(info_dict["sample_id"],
Expand Down
1 change: 1 addition & 0 deletions microsetta_private_api/api/tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -1441,6 +1441,7 @@ def _test_edit_sample_info(self, source_type):
if store_sample_site:
fuzzy_info['sample_site'] = "Tears"
fuzzy_info['sample_datetime'] = datetime.datetime.utcnow()
fuzzy_info['barcode_meta'] = {}

# Many fields are not writable, each should individually cause failure.
readonly_fields = [
Expand Down
23 changes: 23 additions & 0 deletions microsetta_private_api/db/patches/0144.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
-- Beginning with cheek samples, we're collecting metadata that are explicitly
-- linked to sample collection (unlike surveys, which are implicitly linked
-- to samples via sources), but not globally collected, and therefore don't
-- belong in the ag.ag_kit_barcodes table. A new table will store these
-- fields and could eventually be extended to a much more robust framework.

-- First, we need to set up an ENUM type to enforce values for the type of
-- product used to last wash their face
CREATE TYPE SAMPLE_SITE_LAST_WASHED_PRODUCT_TYPE AS ENUM ('Soap (includes bar and liquid soap)', 'Foaming face wash', 'Face cleanser', 'Just water', 'Other (e.g. shampoo, body wash, all-in-one or all-over wash)', 'Not sure');

-- Then, create the table to store the data
-- Note: the date and time are stored separately because we're not enforcing
-- either as a required field. As such, using a timestamp type would not be
-- appropriate since it forces us into a both or neither paradigm.
CREATE TABLE ag.ag_kit_barcodes_cheek (
ag_kit_barcode_id UUID NOT NULL PRIMARY KEY,
sample_site_last_washed_date DATE,
sample_site_last_washed_time TIME,
sample_site_last_washed_product SAMPLE_SITE_LAST_WASHED_PRODUCT_TYPE,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this allowed to be null? Under what circumstances would that be the case?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, all three of the questions are (highly) encouraged but strictly optional. As such, we allow them to be nullable in the database.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok ... now I'm wondering what the meaningful difference is between (a) a completed cheek sample with no record in the ag_kit_barcodes_cheek table and (b) a completed cheek sample with a record in the ag_kit_barcodes_cheek table for which all fields except the foreign key back to the ag_kit_barcodes table are NULL. Do these two cases have two distinct meanings? What is signified by a completely empty (except for FK) record in the cheek table?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From a metadata perspective, they do have a distinct meaning. NULL values would be reported as "not provided" while a cheek sample with no values for the questions (including NULL) would be reported as "not collected."

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh, that's fascinating--I don't think I really understand what these differences mean in this context, but good to know!


-- Foreign key relationship on ag_kit_barcode_id
CONSTRAINT fk_ag_kit_barcode_id FOREIGN KEY (ag_kit_barcode_id) REFERENCES ag.ag_kit_barcodes (ag_kit_barcode_id)
);
15 changes: 13 additions & 2 deletions microsetta_private_api/model/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@ def __init__(self, sample_id, datetime_collected, site, notes, barcode,
self.account_id = account_id

self.accession_urls = []
self.barcode_meta = {}
self.kit_id = kit_id

def set_accession_urls(self, accession_urls):
self.accession_urls = accession_urls

def set_barcode_meta(self, barcode_meta):
self.barcode_meta = barcode_meta

@property
def edit_locked(self):
# If a sample has been scanned and is valid, it is locked.
Expand Down Expand Up @@ -74,18 +78,25 @@ def to_api(self):
"account_id": self.account_id,
"sample_projects": list(self.sample_projects),
"accession_urls": self.accession_urls,
"kit_id": self.kit_id
"kit_id": self.kit_id,
"barcode_meta": self.barcode_meta
}


# A SampleInfo represents the set of end user editable fields whose lifetime
# matches that of the association between a sample and a source
class SampleInfo:
def __init__(self, sample_id, datetime_collected, site, notes):
def __init__(self, sample_id, datetime_collected, site, notes,
barcode_meta=None):
cassidysymons marked this conversation as resolved.
Show resolved Hide resolved
self.id = sample_id
# NB: datetime_collected may be None if sample not yet used
self.datetime_collected = datetime_collected
# NB: notes may be None
self.notes = notes
# NB: site may be None if sample not yet used
self.site = site

if barcode_meta is None:
self.barcode_meta = {}
else:
self.barcode_meta = barcode_meta
15 changes: 13 additions & 2 deletions microsetta_private_api/repo/metadata_repo/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@
'host_body_habitat': 'UBERON:feces',
'env_material': 'feces',
'env_package': 'human-gut',
'description': 'American Gut Project Stool sample',
'description': 'Microsetta Initiative Stool sample',
'host_body_site': 'UBERON:feces'},
'Forehead': {
'host_body_product': 'UBERON:sebum',
Expand Down Expand Up @@ -356,7 +356,18 @@
'env_package': 'human-associated',
'description': 'American Gut Project Ear wax sample',
'empo_3': 'Animal secretion',
'host_body_site': 'UBERON:external auditory meatus'}
'host_body_site': 'UBERON:external auditory meatus'},
'Cheek': {
'host_body_product': 'UBERON:sebum',
'sample_type': 'skin of cheek',
'scientific_name': 'human skin metagenome',
'taxon_id': '539655',
'host_body_habitat': 'UBERON:skin',
'empo_3': 'Animal surface',
'env_material': 'sebum material',
'env_package': 'human-skin',
'description': 'Microsetta Initiative cheek skin sample',
'host_body_site': 'UBERON:skin of cheek'}
}
cassidysymons marked this conversation as resolved.
Show resolved Hide resolved


Expand Down
4 changes: 4 additions & 0 deletions microsetta_private_api/repo/metadata_repo/_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,10 @@ def _to_pandas_series(metadata, multiselect_map):
index.append(shortname)
values.append(answer)

for variable, value in sample_detail.barcode_meta.items():
index.append(variable)
values.append(value)

for variable, value in sample_invariants.items():
index.append(variable)
values.append(value)
Expand Down
23 changes: 15 additions & 8 deletions microsetta_private_api/repo/metadata_repo/tests/test_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ def setUp(self):
"sample": MM({
"sample_projects": ["American Gut Project"],
"datetime_collected": "2013-10-15T09:30:00",
"site": "Stool"
"site": "Stool",
"barcode_meta": {
"sample_site_last_washed_date": "01/10/2025"
}
}),
'survey_answers': [
{'template': 1,
Expand Down Expand Up @@ -131,7 +134,8 @@ def setUp(self):
"sample": MM({
"sample_projects": ["American Gut Project"],
"datetime_collected": "2013-10-15T09:30:00",
"site": "Stool"
"site": "Stool",
"barcode_meta": {}
}),
'survey_answers': [
{'template': 1,
Expand Down Expand Up @@ -167,7 +171,8 @@ def setUp(self):
"sample": MM({
"sample_projects": ["American Gut Project"],
"datetime_collected": "2013-10-15T09:30:00",
"site": "Stool"
"site": "Stool",
"barcode_meta": {}
}),
'survey_answers': [
{'template': SurveyTemplateRepo.DIET_ID,
Expand Down Expand Up @@ -377,13 +382,13 @@ def test_to_pandas_dataframe(self):
'true', 'true', 'false', 'false',
UNSPECIFIED,
'okay', 'No', "2013-10-15T09:30:00", '000004216',
'US:CA', 'CA', '33', '-117'],
'US:CA', 'CA', '33', '-117', '01/10/2025'],
['XY0004216', 'bar', 'Vegan foo', 'Yes',
UNSPECIFIED, UNSPECIFIED, UNSPECIFIED,
'No', 'false', 'true', 'true', 'false',
'foobar', UNSPECIFIED, UNSPECIFIED,
"2013-10-15T09:30:00", 'XY0004216',
'US:CA', 'CA', '33', '-117']],
'US:CA', 'CA', '33', '-117', 'not provided']],
columns=['sample_name', 'host_subject_id',
'diet_type', 'multivitamin',
'probiotic_frequency',
Expand All @@ -396,7 +401,8 @@ def test_to_pandas_dataframe(self):
'sample2specific', 'abc', 'def',
'collection_timestamp',
'anonymized_name', 'geo_loc_name',
'state', 'latitude', 'longitude']
'state', 'latitude', 'longitude',
'sample_site_last_washed_date']
).set_index('sample_name')

for k, v in HUMAN_SITE_INVARIANTS['Stool'].items():
Expand Down Expand Up @@ -424,15 +430,16 @@ def test_to_pandas_series(self):
values = ['foo', '', 'No', 'Unspecified', 'Unspecified',
'Unspecified', 'No', 'true', 'true', 'false',
'false', 'okay', 'No',
'2013-10-15T09:30:00', 'US:CA', 'CA', '33', '-117']
'2013-10-15T09:30:00', 'US:CA', 'CA', '33', '-117',
'01/10/2025']
index = ['HOST_SUBJECT_ID', 'DIET_TYPE', 'MULTIVITAMIN',
'PROBIOTIC_FREQUENCY', 'VITAMIN_B_SUPPLEMENT_FREQUENCY',
'VITAMIN_D_SUPPLEMENT_FREQUENCY',
'OTHER_SUPPLEMENT_FREQUENCY',
'ALLERGIC_TO_blahblah', 'ALLERGIC_TO_stuff', 'ALLERGIC_TO_x',
'ALLERGIC_TO_baz', 'abc', 'def',
'COLLECTION_TIMESTAMP', 'GEO_LOC_NAME', 'STATE', 'LATITUDE',
'LONGITUDE']
'LONGITUDE', 'sample_site_last_washed_date']

for k, v in HUMAN_SITE_INVARIANTS['Stool'].items():
values.append(v)
Expand Down
Loading