-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #384 from wasade/push_metadata_to_qiita
Push metadata to qiita
- Loading branch information
Showing
10 changed files
with
260 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from microsetta_private_api.config_manager import SERVER_CONFIG | ||
from qiita_client import QiitaClient | ||
|
||
|
||
qclient = QiitaClient( | ||
SERVER_CONFIG["qiita_endpoint"], | ||
SERVER_CONFIG["qiita_client_id"], | ||
SERVER_CONFIG["qiita_client_secret"] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
from microsetta_private_api.repo.base_repo import BaseRepo | ||
from microsetta_private_api.qiita import qclient | ||
from microsetta_private_api.repo.metadata_repo import retrieve_metadata | ||
from microsetta_private_api.repo.metadata_repo._constants import MISSING_VALUE | ||
|
||
|
||
class QiitaRepo(BaseRepo): | ||
def push_metadata_to_qiita(self, barcodes=None): | ||
"""Attempt to format and push metadata for the set of barcodes | ||
Only barcodes not currently represented in Qiita will be pushed. | ||
Parameters | ||
---------- | ||
barcodes : Iterable or None | ||
The list of barcodes to attempt to push. If None, all | ||
"sample-is-valid", as based on their latest scan, will be | ||
used. | ||
Notes | ||
----- | ||
We are NOT capturing exceptions from the QiitaClient. These errors | ||
should all be pathological. | ||
Raises | ||
------ | ||
KeyError | ||
If metadata categories from Microsetta are observed to NOT | ||
exist in Qiita. | ||
Returns | ||
------- | ||
int | ||
The number of successfully pushed samples to Qiita | ||
list | ||
Any error detail when constructing metadata | ||
""" | ||
if barcodes is None: | ||
with self._transaction.cursor() as cur: | ||
# obtain all barcodes, which are part of the AG table, | ||
# which report as their latest scan being valid | ||
|
||
# staging has site_sampled with "Please select..." | ||
# and some examples of null source IDs. This is weird, so | ||
# ignore for now. | ||
cur.execute("""SELECT ag_kit_barcodes.barcode | ||
FROM ag.ag_kit_barcodes | ||
INNER JOIN barcodes.barcode_scans USING(barcode) | ||
INNER JOIN ( | ||
SELECT barcode, | ||
max(scan_timestamp) | ||
AS scan_timestamp_latest | ||
FROM barcodes.barcode_scans | ||
GROUP BY barcode | ||
) AS latest_scan | ||
ON barcode_scans.barcode = latest_scan.barcode | ||
AND barcode_scans.scan_timestamp = | ||
latest_scan.scan_timestamp_latest | ||
WHERE sample_status='sample-is-valid' | ||
AND site_sampled IS NOT NULL | ||
AND site_sampled != 'Please select...' | ||
AND source_id IS NOT NULL""") | ||
|
||
barcodes = {r[0] for r in cur.fetchall()} | ||
else: | ||
barcodes = set(barcodes) | ||
|
||
# determine what samples are already known in qiita | ||
samples_in_qiita = set(qclient.get('/api/v1/study/10317/samples')) | ||
|
||
# throw away the 10317. study prefix | ||
samples_in_qiita = {i.split('.', 1)[1] for i in samples_in_qiita} | ||
|
||
# gather the categories currently used in qiita. we have to have parity | ||
# with the categories when pushing | ||
cats_in_qiita = qclient.get('/api/v1/study/10317/samples/info') | ||
cats_in_qiita = set(cats_in_qiita['categories']) | ||
|
||
# we will only push samples that are not already present. | ||
# in testing on stating with qiita-rc, it was observed that | ||
# large request bodies failed, so we will artificially limit to | ||
# 1000 samples max per request. We can always use multiple | ||
# calls to this function if and as needed. | ||
to_push = list(barcodes - samples_in_qiita)[:1000] | ||
|
||
# short circuit if we do not have anything to push | ||
if len(to_push) == 0: | ||
return 0, [] | ||
|
||
formatted, error = retrieve_metadata(to_push) | ||
if len(formatted) == 0: | ||
return 0, error | ||
|
||
columns = set(formatted.columns) | ||
|
||
# the qiita endpoint will not allow for adding new categories | ||
# and we can determine this before we poke qiita. | ||
# TODO: allow adding new columns to Qiita | ||
if not cats_in_qiita.issuperset(columns): | ||
formatted = formatted[cats_in_qiita & columns] | ||
|
||
# if there are any categories not represented, remark them as | ||
# missing in the metadata | ||
for c in cats_in_qiita - columns: | ||
formatted[c] = MISSING_VALUE | ||
|
||
for_qiita = formatted.to_json(orient='index') | ||
qclient.http_patch('/api/v1/study/10317/samples', data=for_qiita) | ||
n_pushed = len(formatted) | ||
|
||
return n_pushed, error |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
from unittest import TestCase, main | ||
from unittest.mock import patch | ||
from microsetta_private_api.repo.transaction import Transaction | ||
from microsetta_private_api.repo.qiita_repo import QiitaRepo | ||
|
||
|
||
class FakeColumns: | ||
def __init__(self, columns): | ||
self._columns = columns | ||
|
||
def __getitem__(self, thing): | ||
pass | ||
|
||
def __iter__(self): | ||
return iter(self._columns) | ||
|
||
|
||
class FakeFrame: | ||
def __init__(self, columns): | ||
self.columns = FakeColumns(columns) | ||
|
||
def to_json(self, *args, **kwargs): | ||
return "[]" | ||
|
||
def __len__(self): | ||
return 1 | ||
|
||
|
||
class AdminTests(TestCase): | ||
@patch('microsetta_private_api.qiita.qclient.get') | ||
@patch('microsetta_private_api.qiita.qclient.http_patch') | ||
@patch('microsetta_private_api.repo.qiita_repo.retrieve_metadata') | ||
def test_push_metadata_to_qiita(self, test_retrieve_metadata, | ||
test_http_patch, test_get): | ||
# fake codes | ||
fecal_valid_barcode = '0x0004801' | ||
oral_valid_barcode = '0x0015213' | ||
skin_valid_barcode = '0x0027751' | ||
|
||
blank = 'foobarblank' | ||
test_barcodes = [fecal_valid_barcode, | ||
oral_valid_barcode, | ||
skin_valid_barcode] | ||
|
||
failure = [{skin_valid_barcode: ("This barcode is not " | ||
"associated with any surveys " | ||
"matching this template id")}, ] | ||
# one inserts, one fails | ||
# using side_effect to change returns | ||
# https://stackoverflow.com/a/24897297 | ||
test_get.side_effect = [ | ||
['foo.' + blank, | ||
'foo.' + oral_valid_barcode, ], # first .get for samples | ||
{'categories': ['a', 'b', 'c', 'd']}, # second .get for categories | ||
] | ||
test_http_patch.return_value = [] | ||
test_retrieve_metadata.return_value = ( | ||
FakeFrame(['a', 'b', 'c', 'd']), | ||
failure | ||
) | ||
|
||
with Transaction() as t: | ||
qiita_repo = QiitaRepo(t) | ||
success, failed = qiita_repo.push_metadata_to_qiita(test_barcodes) | ||
|
||
self.assertEqual(success, 1) | ||
self.assertEqual(failed, [ | ||
{skin_valid_barcode: ("This barcode is not " | ||
"associated with any surveys " | ||
"matching this template id")}]) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Oops, something went wrong.